commit/galaxy-central: dan: Add GenomeSpace tools.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/7ab3012fe281/ changeset: 7ab3012fe281 user: dan date: 2012-03-29 16:24:28 summary: Add GenomeSpace tools. affected #: 13 files diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tool_conf.xml.sample --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -27,14 +27,14 @@ <tool file="data_source/epigraph_import.xml" /><tool file="data_source/epigraph_import_test.xml" /><tool file="data_source/hbvar.xml" /> - <tool file="data_source/genomespace_file_browser_prod.xml" /> - <!-- <tool file="data_source/genomespace_file_browser_test.xml" /> - <tool file="data_source/genomespace_file_browser_dev.xml" /> --> + <tool file="genomespace/genomespace_file_browser_prod.xml" /> + <tool file="genomespace/genomespace_importer.xml" /><tool file="validation/fix_errors.xml" /></section><section name="Send Data" id="send"><tool file="data_destination/epigraph.xml" /><tool file="data_destination/epigraph_test.xml" /> + <tool file="genomespace/genomespace_exporter.xml" /></section><section name="ENCODE Tools" id="EncodeTools"><tool file="encode/gencode_partition.xml" /> diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/data_source/genomespace_file_browser.py --- a/tools/data_source/genomespace_file_browser.py +++ /dev/null @@ -1,148 +0,0 @@ -#Dan Blankenberg - -import optparse, os, urllib2, cookielib - -from galaxy import eggs -import pkg_resources - -pkg_resources.require( "simplejson" ) -import simplejson - -GENOMESPACE_API_VERSION_STRING = "v1.0" -GENOMESPACE_SERVER_URL_PROPERTIES = "http://www.genomespace.org/sites/genomespacefiles/config/serverurl.propertie..." - -CHUNK_SIZE = 2**20 #1mb - -DEFAULT_GALAXY_EXT = "data" - -#genomespace format identifier is the URL -GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT = {} #TODO: fix this so it is not a global variable -#TODO: we should use a better way to set up this mapping -GENOMESPACE_EXT_TO_GALAXY_EXT = {'rifles': 'rifles', - 'lifes': 'lifes', - 'cn': 'cn', - 'GTF': 'gtf', - 'res': 'res', - 'xcn': 'xcn', - 'lowercasetxt': 'lowercasetxt', - 'bed': 'bed', - 'CBS': 'cbs', - 'genomicatab': 'genomicatab', - 'gxp': 'gxp', - 'reversedtxt': 'reversedtxt', - 'nowhitespace': 'nowhitespace', - 'unknown': 'unknown', - 'txt': 'txt', - 'uppercasetxt': 'uppercasetxt', - 'GISTIC': 'gistic', - 'GFF': 'gff', - 'gmt': 'gmt', - 'gct': 'gct'} - - -def chunk_write( source_stream, target_stream, source_method = "read", target_method="write" ): - source_method = getattr( source_stream, source_method ) - target_method = getattr( target_stream, target_method ) - while True: - chunk = source_method( CHUNK_SIZE ) - if chunk: - target_method( chunk ) - else: - break - -def get_cookie_opener( gs_username, gs_token ): - """ Create a GenomeSpace cookie opener """ - cj = cookielib.CookieJar() - for cookie_name, cookie_value in [ ( 'gs-token', gs_token ), ( 'gs-username', gs_username ) ]: - #create a super-cookie, valid for all domains - cookie = cookielib.Cookie(version=0, name=cookie_name, value=cookie_value, port=None, port_specified=False, domain='', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False ) - cj.set_cookie( cookie ) - cookie_opener = urllib2.build_opener( urllib2.HTTPCookieProcessor( cj ) ) - return cookie_opener - -def get_galaxy_ext_from_genomespace_format_url( url_opener, file_format_url ): - ext = GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT.get( file_format_url, None ) - if ext is not None: - ext = GENOMESPACE_EXT_TO_GALAXY_EXT.get( ext, None ) - if ext is None: - #could check content type, etc here - ext = DEFAULT_GALAXY_EXT - return ext - -def get_genomespace_site_urls(): - genomespace_sites = {} - for line in urllib2.urlopen( GENOMESPACE_SERVER_URL_PROPERTIES ).read().split( '\n' ): - line = line.rstrip() - if not line or line.startswith( "#" ): - continue - server, line = line.split( '.', 1 ) - if server not in genomespace_sites: - genomespace_sites[server] = {} - line = line.split( "=", 1 ) - genomespace_sites[server][line[0]] = line[1] - return genomespace_sites - -def set_genomespace_format_identifiers( url_opener, dm_site ): - gs_request = urllib2.Request( "%s/%s/dataformat/list" % ( dm_site, GENOMESPACE_API_VERSION_STRING ) ) - gs_request.get_method = lambda: 'GET' - opened_gs_request = url_opener.open( gs_request ) - genomespace_formats = simplejson.loads( opened_gs_request.read() ) - for format in genomespace_formats: - GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT[ format['url'] ] = format['name'] - -def download_from_genomespace_file_browser( json_parameter_file, genomespace_site ): - json_params = simplejson.loads( open( json_parameter_file, 'r' ).read() ) - datasource_params = json_params.get( 'param_dict' ) - username = datasource_params.get( "gs-username", None ) - token = datasource_params.get( "gs-token", None ) - assert None not in [ username, token ], "Missing GenomeSpace username or token." - output_filename = datasource_params.get( "output", None ) - dataset_id = json_params['output_data'][0]['dataset_id'] - hda_id = json_params['output_data'][0]['hda_id'] - url_opener = get_cookie_opener( username, token ) - #load and set genomespace format ids to galaxy exts - genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ] - set_genomespace_format_identifiers( url_opener, genomespace_site_dict['dmServer'] ) - - file_url_prefix = "fileUrl" - file_type_prefix = "fileFormat" - metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' ) - file_numbers = [] - for name in datasource_params.keys(): - if name.startswith( file_url_prefix ): - name = name[len( file_url_prefix ):] - file_numbers.append( int( name ) ) - file_numbers.sort() - for file_num in file_numbers: - url_key = "%s%i" % ( file_url_prefix, file_num ) - download_url = datasource_params.get( url_key, None ) - if download_url is None: - break - filetype_key = "%s%i" % ( file_type_prefix, file_num ) - filetype_url = datasource_params.get( filetype_key, None ) - galaxy_ext = get_galaxy_ext_from_genomespace_format_url( url_opener, filetype_url ) - if output_filename is None: - output_filename = os.path.join( datasource_params['__new_file_path__'], 'primary_%i_output%i_visible_%s' % ( hda_id, file_num, galaxy_ext ) ) - else: - if dataset_id is not None: - metadata_parameter_file.write( "%s\n" % simplejson.dumps( dict( type = 'dataset', - dataset_id = dataset_id, - ext = galaxy_ext ) ) ) - output_file = open( output_filename, 'wb' ) - new_file_request = urllib2.Request( download_url ) - new_file_request.get_method = lambda: 'GET' - target_download_url = url_opener.open( new_file_request ) - chunk_write( target_download_url, output_file ) - output_file.close() - output_filename = None #only have one filename available - metadata_parameter_file.close() - return True - -if __name__ == '__main__': - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option( '-p', '--json_parameter_file', dest='json_parameter_file', action='store', type="string", default=None, help='json_parameter_file' ) - parser.add_option( '-s', '--genomespace_site', dest='genomespace_site', action='store', type="string", default=None, help='genomespace_site' ) - (options, args) = parser.parse_args() - - download_from_genomespace_file_browser( options.json_parameter_file, options.genomespace_site ) diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/data_source/genomespace_file_browser_dev.xml --- a/tools/data_source/genomespace_file_browser_dev.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0"?> -<tool name="GenomeSpace import" id="genomespace_file_browser_dev" tool_type="data_source" add_galaxy_url="False" force_history_refresh="True" version="0.0.1"> - <description>from file browser (development)</description> - <command interpreter="python">genomespace_file_browser.py --json_parameter_file "${output}" --genomespace_site "dev"</command> - <inputs action="https://dmdev.genomespace.org:8444/datamanager/defaultdirectory" check_values="False" method="post"> - <display>go to GenomeSpace Data Manager </display> - <param name="appCallbackUrl" type="baseurl" value="/tool_runner?tool_id=genomespace_file_browser_dev&runtool_btn=Execute" /> - <param name="appName" type="hidden" value="Galaxy" /> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="auto" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/data_source/genomespace_file_browser_prod.xml --- a/tools/data_source/genomespace_file_browser_prod.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0"?> -<tool name="GenomeSpace import" id="genomespace_file_browser_prod" tool_type="data_source" add_galaxy_url="False" force_history_refresh="True" version="0.0.1"> - <description>from file browser</description> - <command interpreter="python">genomespace_file_browser.py --json_parameter_file "${output}" --genomespace_site "prod"</command> - <inputs action="https://dm.genomespace.org/datamanager/defaultdirectory" check_values="False" method="post"> - <display>go to GenomeSpace Data Manager </display> - <param name="appCallbackUrl" type="baseurl" value="/tool_runner?tool_id=genomespace_file_browser_prod&runtool_btn=Execute" /> - <param name="appName" type="hidden" value="Galaxy" /> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="auto" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/data_source/genomespace_file_browser_test.xml --- a/tools/data_source/genomespace_file_browser_test.xml +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0"?> -<tool name="GenomeSpace import" id="genomespace_file_browser_test" tool_type="data_source" add_galaxy_url="False" force_history_refresh="True" version="0.0.1"> - <description>from file browser (test)</description> - <command interpreter="python">genomespace_file_browser.py --json_parameter_file "${output}" --genomespace_site "test"</command> - <inputs action="https://dmtest.genomespace.org:8444/datamanager/defaultdirectory" check_values="False" method="post"> - <display>go to GenomeSpace Data Manager </display> - <param name="appCallbackUrl" type="baseurl" value="/tool_runner?tool_id=genomespace_file_browser_test&runtool_btn=Execute" /> - <param name="appName" type="hidden" value="Galaxy" /> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="auto" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/genomespace/genomespace_exporter.py --- /dev/null +++ b/tools/genomespace/genomespace_exporter.py @@ -0,0 +1,208 @@ +#Dan Blankenberg + +import optparse, os, urllib2, urllib, cookielib, hashlib, base64, cgi, binascii + +from galaxy import eggs +import pkg_resources + +pkg_resources.require( "simplejson" ) +import simplejson + +GENOMESPACE_API_VERSION_STRING = "v1.0" +GENOMESPACE_SERVER_URL_PROPERTIES = "http://www.genomespace.org/sites/genomespacefiles/config/serverurl.propertie..." + +CHUNK_SIZE = 2**20 #1mb + + +def chunk_write( source_stream, target_stream, source_method = "read", target_method="write" ): + source_method = getattr( source_stream, source_method ) + target_method = getattr( target_stream, target_method ) + while True: + chunk = source_method( CHUNK_SIZE ) + if chunk: + target_method( chunk ) + else: + break + +def get_cookie_opener( gs_username, gs_token ): + """ Create a GenomeSpace cookie opener """ + cj = cookielib.CookieJar() + for cookie_name, cookie_value in [ ( 'gs-token', gs_token ), ( 'gs-username', gs_username ) ]: + #create a super-cookie, valid for all domains + cookie = cookielib.Cookie(version=0, name=cookie_name, value=cookie_value, port=None, port_specified=False, domain='', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False ) + cj.set_cookie( cookie ) + cookie_opener = urllib2.build_opener( urllib2.HTTPCookieProcessor( cj ) ) + return cookie_opener + +def get_genomespace_site_urls(): + genomespace_sites = {} + for line in urllib2.urlopen( GENOMESPACE_SERVER_URL_PROPERTIES ).read().split( '\n' ): + line = line.rstrip() + if not line or line.startswith( "#" ): + continue + server, line = line.split( '.', 1 ) + if server not in genomespace_sites: + genomespace_sites[server] = {} + line = line.split( "=", 1 ) + genomespace_sites[server][line[0]] = line[1] + return genomespace_sites + +def get_directory( url_opener, dm_url, path ): + url = dm_url + for sub_path in path: + url = "%s/%s" % ( url, sub_path ) + dir_request = urllib2.Request( url, headers = { 'Content-Type': 'application/json', 'Accept': 'application/json' } ) + dir_request.get_method = lambda: 'GET' + try: + dir_dict = simplejson.loads( url_opener.open( dir_request ).read() ) + except urllib2.HTTPError, e: + #print "e", e, url #punting, assuming lack of permisions at this low of a level... + continue + break + return dir_dict + +def get_default_directory( url_opener, dm_url ): + return get_directory( url_opener, dm_url, ["defaultdirectory"] ) + +def create_directory( url_opener, directory_dict, new_dir, dm_url ): + payload = { "isDirectory": True } + for dir_slice in new_dir: + if dir_slice in ( '', '/', None ): + continue + url = '/'.join( ( directory_dict['url'], urllib.quote( dir_slice.replace( '/', '_' ), safe='' ) ) ) + new_dir_request = urllib2.Request( url, headers = { 'Content-Type': 'application/json', 'Accept': 'application/json' }, data = simplejson.dumps( payload ) ) + new_dir_request.get_method = lambda: 'PUT' + directory_dict = simplejson.loads( url_opener.open( new_dir_request ).read() ) + return directory_dict + +def get_genome_space_launch_apps( atm_url, url_opener, file_url, file_type ): + gs_request = urllib2.Request( "%s/%s/webtool/descriptor" % ( atm_url, GENOMESPACE_API_VERSION_STRING ) ) + gs_request.get_method = lambda: 'GET' + opened_gs_request = url_opener.open( gs_request ) + webtool_descriptors = simplejson.loads( opened_gs_request.read() ) + webtools = [] + for webtool in webtool_descriptors: + webtool_name = webtool.get( 'name' ) + base_url = webtool.get( 'baseUrl' ) + use_tool = False + for param in webtool.get( 'fileParameters', [] ): + for format in param.get( 'formats', [] ): + if format.get( 'name' ) == file_type: + use_tool = True + break + if use_tool: + file_param_name = param.get( 'name' ) + #file_name_delimiters = param.get( 'nameDelimiters' ) + if '?' in base_url: + url_delimiter = "&" + else: + url_delimiter = "?" + launch_url = "%s%s%s" % ( base_url, url_delimiter, urllib.urlencode( [ ( file_param_name, file_url ) ] ) ) + webtools.append( ( launch_url, webtool_name ) ) + break + return webtools + +def galaxy_code_get_genomespace_folders( genomespace_site='prod', trans=None, value=None, **kwd ): + if value: + value = value[0]#single select, only 1 value + def recurse_directory_dict( url_opener, cur_options, url ): + cur_directory = urllib2.Request( url )#, headers = { 'Content-Type': 'application/json', 'Accept': 'application/text' } ) #apparently http://www.genomespace.org/team/specs/updated-dm-rest-api:"Every HTTP request to the Data Manager should include the Accept header with a preference for the media types application/json and application/text." is not correct + cur_directory.get_method = lambda: 'GET' + #get url to upload to + cur_directory = url_opener.open( cur_directory ).read() + cur_directory = simplejson.loads( cur_directory ) + directory = cur_directory.get( 'directory', {} ) + contents = cur_directory.get( 'contents', [] ) + if directory.get( 'isDirectory', False ): + selected = directory.get( 'path' ) == value + cur_options.append( { 'name':directory.get( 'name' ), 'value': directory.get( 'path'), 'options':[], 'selected': selected } ) + for sub_dir in contents: + if sub_dir.get( 'isDirectory', False ): + recurse_directory_dict( url_opener, cur_options[-1]['options'], sub_dir.get( 'url' ) ) + rval = [] + if trans and trans.user: + username = trans.user.preferences.get( 'genomespace_username', None ) + token = trans.user.preferences.get( 'genomespace_token', None ) + if None in ( username, token ): + return [] + url_opener = get_cookie_opener( username, token ) + genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ] + dm_url = genomespace_site_dict['dmServer'] + #get default directory + directory_dict = get_default_directory( url_opener, dm_url )['directory'] + #what directory to stuff this in + recurse_directory_dict( url_opener, rval, directory_dict.get( 'url' ) ) + + return rval + + +def send_file_to_genomespace( genomespace_site, username, token, source_filename, target_directory, target_filename, file_type, content_type, log_filename ): + url_opener = get_cookie_opener( username, token ) + genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ] + dm_url = genomespace_site_dict['dmServer'] + #get default directory + if target_directory and target_directory[0] == '/': + directory_dict = get_directory( url_opener, dm_url, [ "%s/%s/%s" % ( GENOMESPACE_API_VERSION_STRING, 'file', target_directory[1] ) ] + target_directory[2:] )['directory'] + target_directory.pop(0) + else: + directory_dict = get_default_directory( url_opener, dm_url )['directory'] + #what directory to stuff this in + target_directory_dict = create_directory( url_opener, directory_dict, target_directory, dm_url ) + #get upload url + upload_url = "uploadurl" + content_length = os.path.getsize( source_filename ) + input_file = open( source_filename ) + content_md5 = hashlib.md5() + chunk_write( input_file, content_md5, target_method="update" ) + input_file.seek( 0 ) #back to start, for uploading + + upload_params = { 'Content-Length': content_length, 'Content-MD5': base64.standard_b64encode( content_md5.digest() ), 'Content-Type': content_type } + upload_url = "%s/%s/%s%s/%s?%s" % ( dm_url, GENOMESPACE_API_VERSION_STRING, upload_url, target_directory_dict['path'], urllib.quote( target_filename, safe='' ), urllib.urlencode( upload_params ) ) + new_file_request = urllib2.Request( upload_url )#, headers = { 'Content-Type': 'application/json', 'Accept': 'application/text' } ) #apparently http://www.genomespace.org/team/specs/updated-dm-rest-api:"Every HTTP request to the Data Manager should include the Accept header with a preference for the media types application/json and application/text." is not correct + new_file_request.get_method = lambda: 'GET' + #get url to upload to + target_upload_url = url_opener.open( new_file_request ).read() + #upload file to determined url + upload_headers = dict( upload_params ) + #upload_headers[ 'x-amz-meta-md5-hash' ] = content_md5.hexdigest() + upload_headers[ 'Accept' ] = 'application/json' + upload_file_request = urllib2.Request( target_upload_url, headers = upload_headers, data = input_file ) + upload_file_request.get_method = lambda: 'PUT' + upload_result = urllib2.urlopen( upload_file_request ).read() + + result_url = "%s/%s" % ( target_directory_dict['url'], urllib.quote( target_filename, safe='' ) ) + #determine available gs launch apps + web_tools = get_genome_space_launch_apps( genomespace_site_dict['atmServer'], url_opener, result_url, file_type ) + if log_filename: + log_file = open( log_filename, 'wb' ) + log_file.write( "<html><head><title>File uploaded to GenomeSpace from Galaxy</title></head><body>\n" ) + log_file.write( '<p>Uploaded <a href="%s">%s/%s</a> to GenomeSpace.</p>\n' % ( result_url, target_directory_dict['path'], target_filename ) ) + if web_tools: + log_file.write( "<p>You may open this file directly in the following applications:</p>\n" ) + log_file.write( '<p><ul>\n' ) + for web_tool in web_tools: + log_file.write( '<li><a href="%s">%s</a></li>\n' % ( web_tool ) ) + log_file.write( '</p></ul>\n' ) + else: + log_file.write( '<p>There are no GenomeSpace applications available for file type: %s</p>\n' % ( file_type ) ) + log_file.write( "</body></html>\n" ) + return upload_result + +if __name__ == '__main__': + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-s', '--genomespace_site', dest='genomespace_site', action='store', type="string", default=None, help='genomespace_site' ) + parser.add_option( '-t', '--token', dest='token', action='store', type="string", default=None, help='token' ) + parser.add_option( '-u', '--username', dest='username', action='store', type="string", default=None, help='username' ) + parser.add_option( '-d', '--dataset', dest='dataset', action='store', type="string", default=None, help='dataset' ) + parser.add_option( '-f', '--filename', dest='filename', action='store', type="string", default=None, help='filename' ) + parser.add_option( '-y', '--subdirectory', dest='subdirectory', action='append', type="string", default=None, help='subdirectory' ) + parser.add_option( '', '--file_type', dest='file_type', action='store', type="string", default=None, help='file_type' ) + parser.add_option( '-c', '--content_type', dest='content_type', action='store', type="string", default=None, help='content_type' ) + parser.add_option( '-l', '--log', dest='log', action='store', type="string", default=None, help='log' ) + + (options, args) = parser.parse_args() + + send_file_to_genomespace( options.genomespace_site, options.username, options.token, options.dataset, map( binascii.unhexlify, options.subdirectory ), options.filename, options.file_type, options.content_type, options.log ) + + diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/genomespace/genomespace_exporter.xml --- /dev/null +++ b/tools/genomespace/genomespace_exporter.xml @@ -0,0 +1,51 @@ +<?xml version="1.0"?> +<tool name="GenomeSpace Exporter" id="genomespace_exporter" require_login="True" version="0.0.1"> + <description> - send data to GenomeSpace</description> + <command interpreter="python">genomespace_exporter.py + --genomespace_site "prod" + #assert $__user_id__ != 'Anonymous', Exception( 'You must be logged in to use this tool.' ) + #set $user = $__app__.model.User.get( $__user_id__ ) + #set $username = $user.preferences.get( 'genomespace_username', None ) + #set $token = $user.preferences.get( 'genomespace_token', None ) + #assert None not in ( $username, $token ), Exception( 'You must associate a GenomeSpace OpenID with your account and log in with it.' ) + #import binascii + --username "${username}" + --token "${token}" + --dataset "${input1}" + #if $subdirectory: + #for $subd in str( $subdirectory ).split( '/' ): + #if not $subd: + --subdirectory "${ binascii.hexlify( '/' ) }" + #else: + --subdirectory "${ binascii.hexlify( $subd ) }" + #end if + #end for + #else: + --subdirectory "${ binascii.hexlify( 'galaxy_export' ) }" + --subdirectory "${ binascii.hexlify( str( $base_url ).split( '://', 1 )[-1] ) }" ##Protocol removed by request + #end if + #if $filename: + --filename "${filename}" + #else: + --filename "Galaxy History Item ${__app__.security.encode_id( $input1.id )} - ${input1.hid}: ${input1.name}.${input1.ext}" + #end if + --file_type "${input1.ext}" + --content_type "${input1.get_mime()}" + --log "${output_log}" + </command> + <inputs> + <param format="data" name="input1" type="data" label="Send this dataset to GenomeSpace" /> + <param name="base_url" type="baseurl" /> + <!-- <param name="subdirectory" type="text" size="80" help="Leave blank to generate automatically" /> --> + <param name="subdirectory" type="drill_down" display="radio" hierarchy="exact" multiple="False" label="Choose Target Directory" dynamic_options="galaxy_code_get_genomespace_folders( genomespace_site = 'prod', trans=__trans__, value=__value__, input_dataset=input1 )" help="Leave blank to generate automatically"/> + <param name="filename" type="text" size="80" help="Leave blank to generate automatically" /> + </inputs> + <outputs> + <data format="html" name="output_log" /> + </outputs> + <help> + This Tool allows you to export data to GenomeSpace. You must have logged in using your GenomeSpace OpenID. You can associate your OpenID credentials under the User Preferences panel. + </help> + <options refresh="True"/> + <code file="genomespace_exporter.py" /> +</tool> \ No newline at end of file diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/genomespace/genomespace_file_browser.py --- /dev/null +++ b/tools/genomespace/genomespace_file_browser.py @@ -0,0 +1,160 @@ +#Dan Blankenberg + +import optparse, os, urllib, urllib2, urlparse, cookielib + +from galaxy import eggs +import pkg_resources + +pkg_resources.require( "simplejson" ) +import simplejson + +GENOMESPACE_API_VERSION_STRING = "v1.0" +GENOMESPACE_SERVER_URL_PROPERTIES = "http://www.genomespace.org/sites/genomespacefiles/config/serverurl.propertie..." + +CHUNK_SIZE = 2**20 #1mb + +DEFAULT_GALAXY_EXT = "data" + +#genomespace format identifier is the URL +GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT = {} #TODO: fix this so it is not a global variable +#TODO: we should use a better way to set up this mapping +GENOMESPACE_EXT_TO_GALAXY_EXT = {'rifles': 'rifles', + 'lifes': 'lifes', + 'cn': 'cn', + 'GTF': 'gtf', + 'res': 'res', + 'xcn': 'xcn', + 'lowercasetxt': 'lowercasetxt', + 'bed': 'bed', + 'CBS': 'cbs', + 'genomicatab': 'genomicatab', + 'gxp': 'gxp', + 'reversedtxt': 'reversedtxt', + 'nowhitespace': 'nowhitespace', + 'unknown': 'unknown', + 'txt': 'txt', + 'uppercasetxt': 'uppercasetxt', + 'GISTIC': 'gistic', + 'GFF': 'gff', + 'gmt': 'gmt', + 'gct': 'gct'} + + +def chunk_write( source_stream, target_stream, source_method = "read", target_method="write" ): + source_method = getattr( source_stream, source_method ) + target_method = getattr( target_stream, target_method ) + while True: + chunk = source_method( CHUNK_SIZE ) + if chunk: + target_method( chunk ) + else: + break + +def get_cookie_opener( gs_username, gs_token ): + """ Create a GenomeSpace cookie opener """ + cj = cookielib.CookieJar() + for cookie_name, cookie_value in [ ( 'gs-token', gs_token ), ( 'gs-username', gs_username ) ]: + #create a super-cookie, valid for all domains + cookie = cookielib.Cookie(version=0, name=cookie_name, value=cookie_value, port=None, port_specified=False, domain='', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False ) + cj.set_cookie( cookie ) + cookie_opener = urllib2.build_opener( urllib2.HTTPCookieProcessor( cj ) ) + return cookie_opener + +def get_galaxy_ext_from_genomespace_format_url( url_opener, file_format_url ): + ext = GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT.get( file_format_url, None ) + if ext is not None: + ext = GENOMESPACE_EXT_TO_GALAXY_EXT.get( ext, None ) + if ext is None: + #could check content type, etc here + ext = DEFAULT_GALAXY_EXT + return ext + +def get_genomespace_site_urls(): + genomespace_sites = {} + for line in urllib2.urlopen( GENOMESPACE_SERVER_URL_PROPERTIES ).read().split( '\n' ): + line = line.rstrip() + if not line or line.startswith( "#" ): + continue + server, line = line.split( '.', 1 ) + if server not in genomespace_sites: + genomespace_sites[server] = {} + line = line.split( "=", 1 ) + genomespace_sites[server][line[0]] = line[1] + return genomespace_sites + +def set_genomespace_format_identifiers( url_opener, dm_site ): + gs_request = urllib2.Request( "%s/%s/dataformat/list" % ( dm_site, GENOMESPACE_API_VERSION_STRING ) ) + gs_request.get_method = lambda: 'GET' + opened_gs_request = url_opener.open( gs_request ) + genomespace_formats = simplejson.loads( opened_gs_request.read() ) + for format in genomespace_formats: + GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT[ format['url'] ] = format['name'] + +def download_from_genomespace_file_browser( json_parameter_file, genomespace_site ): + json_params = simplejson.loads( open( json_parameter_file, 'r' ).read() ) + datasource_params = json_params.get( 'param_dict' ) + username = datasource_params.get( "gs-username", None ) + token = datasource_params.get( "gs-token", None ) + assert None not in [ username, token ], "Missing GenomeSpace username or token." + output_filename = datasource_params.get( "output", None ) + dataset_id = json_params['output_data'][0]['dataset_id'] + hda_id = json_params['output_data'][0]['hda_id'] + url_opener = get_cookie_opener( username, token ) + #load and set genomespace format ids to galaxy exts + genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ] + set_genomespace_format_identifiers( url_opener, genomespace_site_dict['dmServer'] ) + + file_url_prefix = "fileUrl" + file_type_prefix = "fileFormat" + metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' ) + file_numbers = [] + for name in datasource_params.keys(): + if name.startswith( file_url_prefix ): + name = name[len( file_url_prefix ):] + file_numbers.append( int( name ) ) + file_numbers.sort() + for file_num in file_numbers: + url_key = "%s%i" % ( file_url_prefix, file_num ) + download_url = datasource_params.get( url_key, None ) + if download_url is None: + break + filetype_key = "%s%i" % ( file_type_prefix, file_num ) + filetype_url = datasource_params.get( filetype_key, None ) + galaxy_ext = get_galaxy_ext_from_genomespace_format_url( url_opener, filetype_url ) + formated_download_url = "%s?%s" % ( download_url, urllib.urlencode( [ ( 'dataformat', filetype_url ) ] ) ) + new_file_request = urllib2.Request( formated_download_url ) + new_file_request.get_method = lambda: 'GET' + target_download_url = url_opener.open( new_file_request ) + filename = None + if 'Content-Disposition' in target_download_url.info(): + # If the response has Content-Disposition, try to get filename from it + content_disposition = dict( map( lambda x: x.strip().split('=') if '=' in x else ( x.strip(),'' ), target_download_url.info()['Content-Disposition'].split( ';' ) ) ) + if 'filename' in content_disposition: + filename = content_disposition[ 'filename' ].strip( "\"'" ) + if not filename: + parsed_url = urlparse.urlparse( download_url ) + query_params = urlparse.parse_qs( parsed_url[4] ) + filename = urllib.unquote_plus( parsed_url[2].split( '/' )[-1] ) + if output_filename is None: + output_filename = os.path.join( datasource_params['__new_file_path__'], 'primary_%i_output%i_visible_%s' % ( hda_id, file_num, galaxy_ext ) ) + else: + if dataset_id is not None: + metadata_parameter_file.write( "%s\n" % simplejson.dumps( dict( type = 'dataset', + dataset_id = dataset_id, + ext = galaxy_ext, + name = "GenomeSpace import on %s" % ( filename ) ) ) ) + output_file = open( output_filename, 'wb' ) + chunk_write( target_download_url, output_file ) + output_file.close() + output_filename = None #only have one filename available + metadata_parameter_file.close() + return True + +if __name__ == '__main__': + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-p', '--json_parameter_file', dest='json_parameter_file', action='store', type="string", default=None, help='json_parameter_file' ) + parser.add_option( '-s', '--genomespace_site', dest='genomespace_site', action='store', type="string", default=None, help='genomespace_site' ) + (options, args) = parser.parse_args() + + download_from_genomespace_file_browser( options.json_parameter_file, options.genomespace_site ) diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/genomespace/genomespace_file_browser_dev.xml --- /dev/null +++ b/tools/genomespace/genomespace_file_browser_dev.xml @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<tool name="GenomeSpace import" id="genomespace_file_browser_dev" tool_type="data_source" add_galaxy_url="False" force_history_refresh="True" version="0.0.1"> + <description>from file browser (development)</description> + <command interpreter="python">genomespace_file_browser.py --json_parameter_file "${output}" --genomespace_site "dev"</command> + <inputs action="https://dmdev.genomespace.org:8444/datamanager/defaultdirectory" check_values="False" method="post"> + <display>go to GenomeSpace Data Manager </display> + <param name="appCallbackUrl" type="baseurl" value="/tool_runner?tool_id=genomespace_file_browser_dev&runtool_btn=Execute" /> + <param name="appName" type="hidden" value="Galaxy" /> + </inputs> + <uihints minwidth="800"/> + <outputs> + <data name="output" format="auto" /> + </outputs> + <options sanitize="False" refresh="True"/> +</tool> diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/genomespace/genomespace_file_browser_prod.xml --- /dev/null +++ b/tools/genomespace/genomespace_file_browser_prod.xml @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<tool name="GenomeSpace import" id="genomespace_file_browser_prod" tool_type="data_source" add_galaxy_url="False" force_history_refresh="True" version="0.0.1"> + <description>from file browser</description> + <command interpreter="python">genomespace_file_browser.py --json_parameter_file "${output}" --genomespace_site "prod"</command> + <inputs action="https://dm.genomespace.org/datamanager/defaultdirectory" check_values="False" method="post"> + <display>go to GenomeSpace Data Manager </display> + <param name="appCallbackUrl" type="baseurl" value="/tool_runner?tool_id=genomespace_file_browser_prod&runtool_btn=Execute" /> + <param name="appName" type="hidden" value="Galaxy" /> + </inputs> + <uihints minwidth="800"/> + <outputs> + <data name="output" format="auto" /> + </outputs> + <options sanitize="False" refresh="True"/> +</tool> diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/genomespace/genomespace_file_browser_test.xml --- /dev/null +++ b/tools/genomespace/genomespace_file_browser_test.xml @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<tool name="GenomeSpace import" id="genomespace_file_browser_test" tool_type="data_source" add_galaxy_url="False" force_history_refresh="True" version="0.0.1"> + <description>from file browser (test)</description> + <command interpreter="python">genomespace_file_browser.py --json_parameter_file "${output}" --genomespace_site "test"</command> + <inputs action="https://dmtest.genomespace.org:8444/datamanager/defaultdirectory" check_values="False" method="post"> + <display>go to GenomeSpace Data Manager </display> + <param name="appCallbackUrl" type="baseurl" value="/tool_runner?tool_id=genomespace_file_browser_test&runtool_btn=Execute" /> + <param name="appName" type="hidden" value="Galaxy" /> + </inputs> + <uihints minwidth="800"/> + <outputs> + <data name="output" format="auto" /> + </outputs> + <options sanitize="False" refresh="True"/> +</tool> diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/genomespace/genomespace_importer.py --- /dev/null +++ b/tools/genomespace/genomespace_importer.py @@ -0,0 +1,156 @@ +#Dan Blankenberg + +import optparse, os, urllib2, urllib, cookielib, urlparse + +from galaxy import eggs +import pkg_resources + +pkg_resources.require( "simplejson" ) +import simplejson + +GENOMESPACE_API_VERSION_STRING = "v1.0" +GENOMESPACE_SERVER_URL_PROPERTIES = "http://www.genomespace.org/sites/genomespacefiles/config/serverurl.propertie..." + +CHUNK_SIZE = 2**20 #1mb + +DEFAULT_GALAXY_EXT = "data" + +#genomespace format identifier is the URL +GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT = {} #TODO: fix this so it is not a global variable +#TODO: we should use a better way to set up this mapping +GENOMESPACE_EXT_TO_GALAXY_EXT = {'rifles': 'rifles', + 'lifes': 'lifes', + 'cn': 'cn', + 'GTF': 'gtf', + 'res': 'res', + 'xcn': 'xcn', + 'lowercasetxt': 'lowercasetxt', + 'bed': 'bed', + 'CBS': 'cbs', + 'genomicatab': 'genomicatab', + 'gxp': 'gxp', + 'reversedtxt': 'reversedtxt', + 'nowhitespace': 'nowhitespace', + 'unknown': 'unknown', + 'txt': 'txt', + 'uppercasetxt': 'uppercasetxt', + 'GISTIC': 'gistic', + 'GFF': 'gff', + 'gmt': 'gmt', + 'gct': 'gct'} + +VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +def chunk_write( source_stream, target_stream, source_method = "read", target_method="write" ): + source_method = getattr( source_stream, source_method ) + target_method = getattr( target_stream, target_method ) + while True: + chunk = source_method( CHUNK_SIZE ) + if chunk: + target_method( chunk ) + else: + break + +def get_cookie_opener( gs_username, gs_token ): + """ Create a GenomeSpace cookie opener """ + cj = cookielib.CookieJar() + for cookie_name, cookie_value in [ ( 'gs-token', gs_token ), ( 'gs-username', gs_username ) ]: + #create a super-cookie, valid for all domains + cookie = cookielib.Cookie(version=0, name=cookie_name, value=cookie_value, port=None, port_specified=False, domain='', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False ) + cj.set_cookie( cookie ) + cookie_opener = urllib2.build_opener( urllib2.HTTPCookieProcessor( cj ) ) + return cookie_opener + +def get_galaxy_ext_from_genomespace_format_url( url_opener, file_format_url ): + ext = GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT.get( file_format_url, None ) + if ext is not None: + ext = GENOMESPACE_EXT_TO_GALAXY_EXT.get( ext, None ) + if ext is None: + #could check content type, etc here + ext = DEFAULT_GALAXY_EXT + return ext + +def get_genomespace_site_urls(): + genomespace_sites = {} + for line in urllib2.urlopen( GENOMESPACE_SERVER_URL_PROPERTIES ).read().split( '\n' ): + line = line.rstrip() + if not line or line.startswith( "#" ): + continue + server, line = line.split( '.', 1 ) + if server not in genomespace_sites: + genomespace_sites[server] = {} + line = line.split( "=", 1 ) + genomespace_sites[server][line[0]] = line[1] + return genomespace_sites + +def set_genomespace_format_identifiers( url_opener, dm_site ): + gs_request = urllib2.Request( "%s/%s/dataformat/list" % ( dm_site, GENOMESPACE_API_VERSION_STRING ) ) + gs_request.get_method = lambda: 'GET' + opened_gs_request = url_opener.open( gs_request ) + genomespace_formats = simplejson.loads( opened_gs_request.read() ) + for format in genomespace_formats: + GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT[ format['url'] ] = format['name'] + +def download_from_genomespace_importer( username, token, json_parameter_file, genomespace_site ): + json_params = simplejson.loads( open( json_parameter_file, 'r' ).read() ) + datasource_params = json_params.get( 'param_dict' ) + #username = datasource_params.get( "gs-username", None ) + #token = datasource_params.get( "gs-token", None ) + assert None not in [ username, token ], "Missing GenomeSpace username or token." + output_filename = datasource_params.get( "output_file1", None ) + dataset_id = json_params['output_data'][0]['dataset_id'] + hda_id = json_params['output_data'][0]['hda_id'] + url_opener = get_cookie_opener( username, token ) + #load and set genomespace format ids to galaxy exts + genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ] + set_genomespace_format_identifiers( url_opener, genomespace_site_dict['dmServer'] ) + file_url_name = "URL" + metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' ) + url_param = datasource_params.get( file_url_name, None ) + for download_url in url_param.split( ',' ): + parsed_url = urlparse.urlparse( download_url ) + query_params = urlparse.parse_qs( parsed_url[4] ) + file_type = DEFAULT_GALAXY_EXT + if 'dataformat' in query_params: + file_type = query_params[ 'dataformat' ][0] + file_type = get_galaxy_ext_from_genomespace_format_url( url_opener, file_type ) + elif '.' in parsed_url[2]: + file_type = parsed_url[2].rsplit( '.', 1 )[-1] + file_type = GENOMESPACE_EXT_TO_GALAXY_EXT.get( file_type, file_type ) + new_file_request = urllib2.Request( download_url ) + new_file_request.get_method = lambda: 'GET' + target_download_url = url_opener.open( new_file_request ) + filename = None + if 'Content-Disposition' in target_download_url.info(): + content_disposition = dict( map( lambda x: x.strip().split('=') if '=' in x else ( x.strip(),'' ), target_download_url.info()['Content-Disposition'].split( ';' ) ) ) + if 'filename' in content_disposition: + filename = content_disposition[ 'filename' ].strip( "\"'" ) + if not filename: + parsed_url = urlparse.urlparse( download_url ) + query_params = urlparse.parse_qs( parsed_url[4] ) + filename = urllib.unquote_plus( parsed_url[2].split( '/' )[-1] ) + if output_filename is None: + output_filename = os.path.join( datasource_params['__new_file_path__'], 'primary_%i_output%s_visible_%s' % ( hda_id, ''.join( c in VALID_CHARS and c or '-' for c in filename ), file_type ) ) + else: + if dataset_id is not None: + metadata_parameter_file.write( "%s\n" % simplejson.dumps( dict( type = 'dataset', + dataset_id = dataset_id, + ext = file_type, + name = "GenomeSpace importer on %s" % ( filename ) ) ) ) + output_file = open( output_filename, 'wb' ) + chunk_write( target_download_url, output_file ) + output_file.close() + output_filename = None #only have one filename available + metadata_parameter_file.close() + return True + +if __name__ == '__main__': + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-p', '--json_parameter_file', dest='json_parameter_file', action='store', type="string", default=None, help='json_parameter_file' ) + parser.add_option( '-s', '--genomespace_site', dest='genomespace_site', action='store', type="string", default=None, help='genomespace_site' ) + parser.add_option( '-t', '--token', dest='token', action='store', type="string", default=None, help='token' ) + parser.add_option( '-u', '--username', dest='username', action='store', type="string", default=None, help='username' ) + (options, args) = parser.parse_args() + + download_from_genomespace_importer( options.username, options.token, options.json_parameter_file, options.genomespace_site ) diff -r a2cd73dd4f7284b1d0ffa56235e478d013cc116f -r 7ab3012fe281f9219e2323e6c8c9694fc2b2628a tools/genomespace/genomespace_importer.xml --- /dev/null +++ b/tools/genomespace/genomespace_importer.xml @@ -0,0 +1,26 @@ +<?xml version="1.0"?> +<tool name="GenomeSpace Importer" id="genomespace_importer" tool_type="data_source" force_history_refresh="True" hidden="True" display_interface="False" require_login="True" version="0.0.1"> + <description> - receive data from GenomeSpace</description> + <command interpreter="python">genomespace_importer.py + --genomespace_site "prod" + #assert $__user_id__ != 'Anonymous', Exception( 'You must be logged in to use this tool.' ) + #set $user = $__app__.model.User.get( $__user_id__ ) + #set $username = $user.preferences.get( 'genomespace_username', None ) + #set $token = $user.preferences.get( 'genomespace_token', None ) + #assert None not in ( $username, $token ), Exception( 'You must associate a GenomeSpace OpenID with your account and log in with it.' ) + --username "${username}" + --token "${token}" + --json_parameter_file "${output_file1}" + </command> + <inputs check_values="False"> + <!-- <param name="file_name" type="text" value="" /> --> + <param name="URL" type="hidden" value="" /> + </inputs> + <outputs> + <data format="auto" name="output_file1" /> + </outputs> + <help> + some help text here... + </help> + <options refresh="True"/> +</tool> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket