[hg] galaxy 1664: Add ability for data_source tools to append pa...
details: http://www.bx.psu.edu/hg/galaxy/rev/5f4511a5b4d6 changeset: 1664:5f4511a5b4d6 user: Greg Von Kuster <greg@bx.psu.edu> date: Sat Dec 13 23:35:29 2008 -0500 description: Add ability for data_source tools to append parameters passed in the initial response to the value of URL prior to Galaxy's post to the URL. This is a cleaner method for Biomart and also gets GBrowse to wrok. 8 file(s) affected in this change: lib/galaxy/tools/__init__.py lib/galaxy/util/__init__.py tools/data_source/biomart.xml tools/data_source/biomart_test.xml tools/data_source/data_source.py tools/data_source/gbrowse_datasource.py tools/data_source/gbrowse_elegans.xml tools/data_source/gbrowse_filter_code.py diffs (293 lines): diff -r 0af3897ce585 -r 5f4511a5b4d6 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Fri Dec 12 14:33:51 2008 -0500 +++ b/lib/galaxy/tools/__init__.py Sat Dec 13 23:35:29 2008 -0500 @@ -231,8 +231,6 @@ # data_source tool if self.tool_type == "data_source": self.URL_method = root.get( "URL_method", "get" ) # get is the default - # TODO: Biomart hack - eliminate when they encode URL - they'll let us know when... - self.add_to_URL = root.get( "add_to_URL", None ) self.param_trans_dict = {} req_param_trans = root.find( "request_param_translation" ) if req_param_trans is not None: @@ -255,6 +253,22 @@ galaxy_format = format.get( "galaxy_format" ) format_trans_dict[ remote_format ] = galaxy_format trans_list.append( format_trans_dict ) + elif req_param.get( "galaxy_name" ) == "URL": + # Some remote data sources ( e.g., Gbrowse ) send parameters back to + # Galaxy in the initial response that must be added to URL prior to + # Galaxy sending the secondary request to the URL. The tag set looks + # asomething like: + # <add_to_url> + # <param_from_source name="d" missing="" /> + # </add_to_url> + add_to_url = req_param.find( "add_to_url" ) + if add_to_url is not None: + add_to_url_dict = {} + for param_from_source in add_to_url.findall( "param_from_source" ): + name = param_from_source.get( "name" ) + value = param_from_source.get( "missing" ) # only used if the source doesn't send the param name + add_to_url_dict[ name ] = value + trans_list.append( add_to_url_dict ) self.param_trans_dict[ remote_name ] = trans_list # Command line (template). Optional for tools that do not invoke a local program command = root.find("command") @@ -1162,11 +1176,16 @@ description = param_dict.get( 'position', '' ) if not description: description = 'unknown position' + gb_landmark_region = param_dict.get( 'q' ) data_type = param_dict.get( 'data_type' ) items = out_data.items() for name, data in items: if organism and table and description: + # This is UCSC data.name = '%s on %s: %s (%s)' % ( data.name, organism, table, description ) + elif gb_landmark_region: + # This is GBrowse + data.name = '%s on %s' % ( data.name, gb_landmark_region ) data.info = info data.dbkey = dbkey try: diff -r 0af3897ce585 -r 5f4511a5b4d6 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py Fri Dec 12 14:33:51 2008 -0500 +++ b/lib/galaxy/util/__init__.py Sat Dec 13 23:35:29 2008 -0500 @@ -159,10 +159,31 @@ try: # The Galaxy "data_type entry is special in that it can include the ability # to translate the format to a Galaxy supported format. In the dict, this entry - # looks something like: {'hgta_outputType': ['data_type', 'bed', {'selectedFields': 'tabular'}] } + # looks something like: + # {'hgta_outputType': ['data_type', 'bed', {'selectedFields': 'tabular'}] } format_trans_dict = tool.param_trans_dict[ key ][2] if value in format_trans_dict: new_value = format_trans_dict[ value ] + except: + pass + elif new_key == 'URL': + # As above, the URL can include a set of params from the remote data source + # that must be appended to the URL prior to the post. In this case, the + # dict entry would look something like: + # ['URL', '', {'q': '', 's': '', 'd': '', 'dbkey': '', 't': ''}] + try: + add_to_url_dict = tool.param_trans_dict[ key ][2] + if new_value.count( '?' ) == 0: + sep = '?' + else: + sep = '&' + for param_name, missing_value in add_to_url_dict.items(): + param_value = params.get( param_name, None ) + if not param_value and missing_value: + param_value = missing_value + if param_value: + new_value += '%s%s=%s' % ( sep, param_name, param_value ) + sep = '&' except: pass if not value and not new_value: @@ -174,9 +195,6 @@ if tool and tool.tool_type == 'data_source': # Add the tool's URL_method to params self.__dict__[ 'URL_method' ] = tool.URL_method - # TODO: Biomart hack - eliminate when they encode URL - they'll let us know when... - if tool.add_to_URL is not None: - self.__dict__[ 'add_to_URL' ] = tool.add_to_URL for key, value in tool.param_trans_dict.items(): # Make sure that all translated values used in Galaxy are added to the params galaxy_name = tool.param_trans_dict[ key ][0] diff -r 0af3897ce585 -r 5f4511a5b4d6 tools/data_source/biomart.xml --- a/tools/data_source/biomart.xml Fri Dec 12 14:33:51 2008 -0500 +++ b/tools/data_source/biomart.xml Sat Dec 13 23:35:29 2008 -0500 @@ -7,7 +7,7 @@ TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed at their end. --> -<tool name="BioMart" id="biomart" tool_type="data_source" URL_method="get" add_to_URL="biomart_hack"> +<tool name="BioMart" id="biomart" tool_type="data_source" URL_method="get"> <description>Central server</description> <command interpreter="python">data_source.py $output</command> <inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> @@ -15,7 +15,12 @@ <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> </inputs> <request_param_translation> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> + <request_param galaxy_name="URL" remote_name="URL" missing=""> + <add_to_url> + <param_from_source name="_export" missing="1" /> + <param_from_source name="GALAXY_URL" missing="0" /> + </add_to_url> + </request_param> <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> <request_param galaxy_name="table" remote_name="table" missing="" /> diff -r 0af3897ce585 -r 5f4511a5b4d6 tools/data_source/biomart_test.xml --- a/tools/data_source/biomart_test.xml Fri Dec 12 14:33:51 2008 -0500 +++ b/tools/data_source/biomart_test.xml Sat Dec 13 23:35:29 2008 -0500 @@ -7,7 +7,7 @@ TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed at their end. --> -<tool name="BioMart" id="biomart_test" tool_type="data_source" URL_method="get" add_to_URL="biomart_hack"> +<tool name="BioMart" id="biomart_test" tool_type="data_source" URL_method="get"> <description>Test server</description> <command interpreter="python">data_source.py $output</command> <inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> @@ -15,7 +15,12 @@ <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> </inputs> <request_param_translation> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> + <request_param galaxy_name="URL" remote_name="URL" missing=""> + <add_to_url> + <param_from_source name="_export" missing="1" /> + <param_from_source name="GALAXY_URL" missing="0" /> + </add_to_url> + </request_param> <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> <request_param galaxy_name="organism" remote_name="organism" missing="" /> <request_param galaxy_name="table" remote_name="table" missing="" /> diff -r 0af3897ce585 -r 5f4511a5b4d6 tools/data_source/data_source.py --- a/tools/data_source/data_source.py Fri Dec 12 14:33:51 2008 -0500 +++ b/tools/data_source/data_source.py Sat Dec 13 23:35:29 2008 -0500 @@ -33,12 +33,6 @@ if not URL: open( filename, 'w' ).write( "" ) stop_err( 'The remote data source application has not sent back a URL parameter in the request.' ) - # TODO: Hack to get biomart to work - this can be eliminated when the Biomart team encodes URL prior to sending, meanwhile - # everything including and beyond the first '&' is truncated from URL. They said they'll let us know when this is fixed - # at their end. - add_to_URL = params.get( 'add_to_URL', None ) - if add_to_URL: - URL += '&_export=1&GALAXY_URL=0' URL_method = params.get( 'URL_method', None ) out = open( filename, 'w' ) CHUNK_SIZE = 2**20 # 1Mb diff -r 0af3897ce585 -r 5f4511a5b4d6 tools/data_source/gbrowse_datasource.py --- a/tools/data_source/gbrowse_datasource.py Fri Dec 12 14:33:51 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,53 +0,0 @@ -#!/usr/bin/env python -#Retreives data from GMOD and stores in a file. GBrowse parameters are provided in the input/output file. -import urllib, sys, os, gzip, tempfile, shutil -from galaxy import eggs -from galaxy.datatypes import data - -assert sys.version_info[:2] >= ( 2, 4 ) - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -def __main__(): - filename = sys.argv[1] - params = {} - - for line in open( filename, 'r' ): - try: - line = line.strip() - fields = line.split( '\t' ) - params[ fields[0] ] = fields[1] - except: - continue - - URL = params.get( 'URL', None ) - if not URL: - open( filename, 'w' ).write( "" ) - stop_err( 'Datasource has not sent back a URL parameter.' ) - - for i, param in enumerate( params.keys() ): - if i == 0: - sep = '?' - else: - sep = '&' - if param != '__collected_datasets__': - URL += "%s%s=%s" % ( sep, param, params.get( param ) ) - - CHUNK_SIZE = 2**20 # 1Mb - try: - page = urllib.urlopen( URL ) - except Exception, exc: - raise Exception( 'Problems connecting to %s (%s)' % ( URL, exc ) ) - sys.exit( 1 ) - - fp = open( filename, 'wb' ) - while 1: - chunk = page.read( CHUNK_SIZE ) - if not chunk: - break - fp.write( chunk ) - fp.close() - -if __name__ == "__main__": __main__() diff -r 0af3897ce585 -r 5f4511a5b4d6 tools/data_source/gbrowse_elegans.xml --- a/tools/data_source/gbrowse_elegans.xml Fri Dec 12 14:33:51 2008 -0500 +++ b/tools/data_source/gbrowse_elegans.xml Sat Dec 13 23:35:29 2008 -0500 @@ -1,13 +1,24 @@ <?xml version="1.0"?> -<tool name="C. Elegans" id="gbrowse_elegans"> +<tool name="C. Elegans" id="gbrowse_elegans" tool_type="data_source" URL_method="get"> <description>server</description> - <command interpreter="python">gbrowse_datasource.py $output</command> - <inputs action="http://www.wormbase.org/db/seq/gbrowse/c_elegans/" check_values="false" method="get" target="_top"> + <command interpreter="python">data_source.py $output</command> + <inputs action="http://dev.wormbase.org/db/seq/gbrowse/c_elegans/" check_values="false" method="get" target="_top"> <display>go to C. Elegans server $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=gbrowse_elegans" /> </inputs> + <request_param_translation> + <request_param galaxy_name="URL" remote_name="URL" missing=""> + <add_to_url> + <param_from_source name="d" missing="" /> + <param_from_source name="dbkey" missing="" /> + <param_from_source name="q" missing="" /> + <param_from_source name="s" missing="" /> + <param_from_source name="t" missing="" /> + </add_to_url> + </request_param> + <request_param galaxy_name="data_type" remote_name="data_type" missing="gff3" /> + </request_param_translation> <uihints minwidth="800"/> - <code file="gbrowse_filter_code.py"/> <outputs> <data name="output" format="txt" /> </outputs> diff -r 0af3897ce585 -r 5f4511a5b4d6 tools/data_source/gbrowse_filter_code.py --- a/tools/data_source/gbrowse_filter_code.py Fri Dec 12 14:33:51 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ -# Code for direct connection to GMOD -from galaxy.datatypes import sniff -import urllib - -import logging -log = logging.getLogger( __name__ ) - -def exec_before_job( app, inp_data, out_data, param_dict, tool=None ): - """Sets the attributes of the data""" - gb_settings = urllib.unquote( param_dict.get( 't', None ) ) # t=CG+TS+ESTB+SAGE+EXPR+EXPR_PATTERN+SNPs+PolyA+BLASTX+LINK+ETILE - gb_landmark_region = urllib.unquote( param_dict.get( 'q' ) ) # q=IV:6070000..6100000& - gb_land_mark, gb_region = gb_landmark_region.split( ':' ) - items = out_data.items() - for name, data in items: - data.name = "%s on %s" % ( data.name, gb_landmark_region ) - data.dbkey = param_dict.get( 'dbkey', '?' ) - # Store GMOD / GBrowse parameters temporarily in output file - out = open( data.file_name, 'w' ) - for key, value in param_dict.items(): - out.write( "%s\t%s\n" % ( key, value ) ) - out.close() - out_data[ name ] = data - -def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ): - """Verifies the data after the run""" - name, data = out_data.items()[0] - data.set_size() - if data.state == data.states.OK: - data.info = data.name - if data.extension == 'txt': - data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order ) - data = app.datatypes_registry.change_datatype( data, data_type ) - data.set_peek() - data.flush()
participants (1)
-
Greg Von Kuster