[hg] galaxy 1550: Use only 1 underlying executable ( data_source...

details: http://www.bx.psu.edu/hg/galaxy/rev/64c0734ff262 changeset: 1550:64c0734ff262 user: Greg Von Kuster <greg@bx.psu.edu> date: Tue Oct 07 15:21:46 2008 -0400 description: Use only 1 underlying executable ( data_source.py ) for data source tools. A new tag set is added to the data source tool configs to handle tranlsation of request param names sent by remote apps ( something like <param_trans galaxy_name="dbkey" remote_name="GENOME" missing="?" /> ). 21 file(s) affected in this change: lib/galaxy/jobs/__init__.py lib/galaxy/tools/__init__.py lib/galaxy/util/__init__.py lib/galaxy/web/controllers/tool_runner.py tool_conf.xml.sample tools/data_source/biomart.py tools/data_source/biomart.xml tools/data_source/biomart_filter.py tools/data_source/biomart_test.xml tools/data_source/data_source.py tools/data_source/epigraph.py tools/data_source/epigraph_code.py tools/data_source/epigraph_import.xml tools/data_source/flymine.xml tools/data_source/flymine_filter_code.py tools/data_source/intermine.py tools/data_source/ucsc_tablebrowser.py tools/data_source/ucsc_tablebrowser.xml tools/data_source/ucsc_tablebrowser_archaea.xml tools/data_source/ucsc_tablebrowser_code.py tools/data_source/ucsc_tablebrowser_test.xml diffs (1016 lines): diff -r 960820cccaaa -r 64c0734ff262 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Tue Oct 07 11:58:32 2008 -0400 +++ b/lib/galaxy/jobs/__init__.py Tue Oct 07 15:21:46 2008 -0400 @@ -270,6 +270,10 @@ incoming['userEmail'] = userEmail # Build params, done before hook so hook can use param_dict = self.tool.build_param_dict( incoming, inp_data, out_data ) + # Certain tools require tasks to be completed prior to job execution + # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). + if self.tool.tool_type is not None: + out_data = self.tool.exec_before_job( self.queue.app, inp_data, out_data, param_dict ) # Run the before queue ("exec_before_job") hook self.tool.call_hook( 'exec_before_job', self.queue.app, inp_data=inp_data, out_data=out_data, tool=self.tool, param_dict=incoming) @@ -437,6 +441,10 @@ # Create generated output children and primary datasets and add to param_dict collected_datasets = {'children':self.tool.collect_child_datasets(out_data),'primary':self.tool.collect_primary_datasets(out_data)} param_dict.update({'__collected_datasets__':collected_datasets}) + # Certain tools require tasks to be completed after job execution + # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ). + if self.tool.tool_type is not None: + self.tool.exec_after_process( self.queue.app, inp_data, out_data, param_dict ) # Call 'exec_after_process' hook self.tool.call_hook( 'exec_after_process', self.queue.app, inp_data=inp_data, out_data=out_data, param_dict=param_dict, diff -r 960820cccaaa -r 64c0734ff262 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Tue Oct 07 11:58:32 2008 -0400 +++ b/lib/galaxy/tools/__init__.py Tue Oct 07 15:21:46 2008 -0400 @@ -225,8 +225,22 @@ if not self.version: # For backward compatibility, some tools may not have versions yet. self.version = "1.0.0" - # Command line (template). Optional for tools that do not invoke a - # local program + # Type of tool + self.tool_type = root.get( "tool_type", None ) + if self.tool_type is not None: + # data_source tool + if self.tool_type == "data_source": + self.param_trans_dict = {} + req_param_trans = root.find( "request_param_translation" ) + if req_param_trans is not None: + for req_param in req_param_trans.findall( "request_param" ): + # req_param tags must look like <request_param galaxy_name="dbkey" remote_name="GENOME" missing="" /> + trans_list = [] + remote_name = req_param.get( "remote_name" ) + trans_list.append( req_param.get( "galaxy_name" ) ) + trans_list.append( req_param.get( "missing" ) ) + self.param_trans_dict[ remote_name ] = trans_list + # Command line (template). Optional for tools that do not invoke a local program command = root.find("command") if command is not None and command.text is not None: self.command = command.text.lstrip() # get rid of leading whitespace @@ -1115,7 +1129,56 @@ except Exception, e: e.args = ( "Error in '%s' hook '%s', original message: %s" % ( self.name, hook_name, e.args[0] ) ) raise - + + def exec_before_job( self, app, inp_data, out_data, param_dict={} ): + if self.tool_type == 'data_source': + # List for converting UCSC to Galaxy exts, if not in following dictionary, use provided datatype + data_type_to_ext = { 'wigdata':'wig', 'tab':'interval', 'hyperlinks':'html', 'sequence':'fasta' } + dbkey = param_dict.get( 'dbkey ' ) + organism = param_dict.get( 'organism' ) + table = param_dict.get( 'table' ) + description = param_dict.get( 'description' ) + if description == 'range': + description = param_dict.get( 'position', '' ) + if not description: + description = 'unknown position' + data_type = param_dict.get( 'data_type ') + items = out_data.items() + for name, data in items: + if organism and table and description: + data.name = '%s on %s: %s (%s)' % ( data.name, organism, table, description ) + data.dbkey = dbkey + ext = data_type + try: + ext = data_type_to_ext[ data_type ] + except: + pass + if ext not in app.datatypes_registry.datatypes_by_extension: + ext = 'interval' + data = app.datatypes_registry.change_datatype( data, ext ) + # store external data source's request parameters temporarily in output file + out = open( data.file_name, 'w' ) + for key, value in param_dict.items(): + print >> out, '%s\t%s' % ( key, value ) + out.close() + out_data[ name ] = data + return out_data + + def exec_after_process( self, app, inp_data, out_data, param_dict ): + # TODO: for data_source tools at least, this code can probably be handled more optimally by adding a new + # tag set in the tool config. + if self.tool_type == 'data_source': + name, data = out_data.items()[0] + if data.state == data.states.OK: + data.info = data.name + if not isinstance( data.datatype, datatypes.interval.Bed ) and isinstance( data.datatype, datatypes.interval.Interval ): + data.set_meta() + if data.missing_meta(): + data = app.datatypes_registry.change_datatype( data, 'tabular' ) + data.set_peek() + data.set_size() + data.flush() + def collect_associated_files( self, output ): for name, outdata in output.items(): temp_file_path = os.path.join( self.app.config.new_file_path, "dataset_%s_files" % ( outdata.id ) ) diff -r 960820cccaaa -r 64c0734ff262 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py Tue Oct 07 11:58:32 2008 -0400 +++ b/lib/galaxy/util/__init__.py Tue Oct 07 15:21:46 2008 -0400 @@ -141,13 +141,30 @@ # different parameters can be sanitized in different ways. NEVER_SANITIZE = ['file_data', 'url_paste', 'URL'] - def __init__(self, params, safe=True, sanitize=True): + def __init__( self, params, safe=True, sanitize=True, tool_type=None, param_trans_dict={} ): if safe: for key, value in params.items(): + # Check to see if we should translate certain parameter names. For example, + # in data_source tools, the external data source application may send back + # parameter names like GENOME which is translated to dbkey in Galaxy. + # param_trans_dict looks like { "GENOME" : [ "dbkey" "?" ] } + new_key = key + new_value = value + if tool_type == 'data_source': + if key in param_trans_dict: + new_key = param_trans_dict[ key ][0] + if not value: + new_value = param_trans_dict[ key ][1] if key not in self.NEVER_SANITIZE and sanitize: - self.__dict__[key] = sanitize_param(value) + self.__dict__[ new_key ] = sanitize_param( new_value ) else: - self.__dict__[key] = value + self.__dict__[ new_key ] = new_value + for key, value in param_trans_dict.items(): + # Make sure that all translated values used in Galaxy are added to the params + galaxy_name = param_trans_dict[ key ][0] + if galaxy_name not in self.__dict__: + # This will set the galaxy_name to the "missing" value + self.__dict__[ galaxy_name ] = param_trans_dict[ key ][1] else: self.__dict__.update(params) diff -r 960820cccaaa -r 64c0734ff262 lib/galaxy/web/controllers/tool_runner.py --- a/lib/galaxy/web/controllers/tool_runner.py Tue Oct 07 11:58:32 2008 -0400 +++ b/lib/galaxy/web/controllers/tool_runner.py Tue Oct 07 15:21:46 2008 -0400 @@ -39,7 +39,11 @@ log.error( "index called with tool id '%s' but no such tool exists", tool_id ) trans.log_event( "Tool id '%s' does not exist" % tool_id ) return "Tool '%s' does not exist, kwd=%s " % (tool_id, kwd) - params = util.Params(kwd, sanitize = tool.options.sanitize) + try: + param_trans_dict = tool.param_trans_dict + except: + param_trans_dict = {} + params = util.Params( kwd, sanitize=tool.options.sanitize, tool_type=tool.tool_type, param_trans_dict=param_trans_dict ) history = trans.get_history() trans.ensure_valid_galaxy_session() template, vars = tool.handle_input( trans, params.__dict__ ) diff -r 960820cccaaa -r 64c0734ff262 tool_conf.xml.sample --- a/tool_conf.xml.sample Tue Oct 07 11:58:32 2008 -0400 +++ b/tool_conf.xml.sample Tue Oct 07 15:21:46 2008 -0400 @@ -2,6 +2,7 @@ <toolbox> <section name="Get Data" id="getext"> <tool file="data_source/upload.xml"/> + <tool file="data_source/access_libraries.xml" /> <tool file="data_source/ucsc_tablebrowser.xml" /> <tool file="data_source/ucsc_tablebrowser_test.xml" /> <tool file="data_source/ucsc_tablebrowser_archaea.xml" /> @@ -123,7 +124,6 @@ <tool file="visualization/GMAJ.xml" /> <tool file="visualization/LAJ.xml" /> <tool file="visualization/build_ucsc_custom_track.xml" /> - <tool file="visualization/build_gbrowse_custom_track.xml" /> </section> <section name="Regional Variation" id="regVar"> <tool file="regVariation/windowSplitter.xml" /> @@ -156,8 +156,8 @@ <tool file="taxonomy/poisson2test.xml" /> </section> <section name="Solexa tools" id="solexa_tools"> - <tool file="solexa/fastq_statistics.xml" /> - <tool file="solexa/lastz_wrapper.xml" /> + <tool file="sr_mapping/fastq_statistics.xml" /> + <tool file="sr_mapping/lastz_wrapper.xml" /> </section> <!-- TODO: uncomment the following EMBOSS section whenever diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/biomart.py --- a/tools/data_source/biomart.py Tue Oct 07 11:58:32 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -#!/usr/bin/env python -#Retreives data from BIOMART and stores in a file. Biomart parameters are provided in the input/output file. -#guruprasad Ananda - -import urllib, sys, os, gzip, tempfile, shutil -from galaxy import eggs - -assert sys.version_info[:2] >= ( 2, 4 ) - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -def __main__(): - filename = sys.argv[1] - params = {} - for line in open(filename, 'r'): - try: - line = line.strip() - fields = line.split('\t') - params[fields[0]] = fields[1] - except: - continue - - URL = params.get( 'URL', None ) - if not URL: - open( filename, 'w' ).write( "" ) - stop_err( 'Datasource has not sent back a URL parameter.' ) - URL = URL + '&_export=1&GALAXY_URL=0' - CHUNK_SIZE = 2**20 # 1Mb - MAX_SIZE = CHUNK_SIZE * 100 - try: - page = urllib.urlopen(URL) - except Exception, exc: - stop_err('Problems connecting to %s (%s)' % (URL, exc) ) - - fp = open(filename, 'w') - size = 0 - max_size_exceeded = False - while 1: - chunk = page.read(CHUNK_SIZE) - if not chunk: - break - size += len(chunk) - if size > MAX_SIZE: - max_size_exceeded = True - break - fp.write(chunk) - fp.close() - - if max_size_exceeded: - print 'Maximum data size of 100 MB exceeded, incomplete data retrieval.' - -if __name__ == "__main__": - __main__() diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/biomart.xml --- a/tools/data_source/biomart.xml Tue Oct 07 11:58:32 2008 -0400 +++ b/tools/data_source/biomart.xml Tue Oct 07 15:21:46 2008 -0400 @@ -1,24 +1,24 @@ <?xml version="1.0"?> -<tool name="BioMart" id="biomart"> +<tool name="BioMart" id="biomart" tool_type="data_source"> <description>Central server</description> - <command interpreter="python"> - biomart.py - $output - </command> + <command interpreter="python">data_source.py $output</command> <inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> <display>go to BioMart Central $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> </inputs> - + <request_param_translation> + <request_param galaxy_name="URL" remote_name="URL" missing="" /> + <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> + <request_param galaxy_name="organism" remote_name="organism" missing="" /> + <request_param galaxy_name="table" remote_name="table" missing="" /> + <request_param galaxy_name="description" remote_name="description" missing="" /> + <request_param galaxy_name="name" remote_name="name" missing="Biomart query" /> + <request_param galaxy_name="info" remote_name="info" missing="" /> + <request_param galaxy_name="data_type" remote_name="type" missing="txt" /> + </request_param_translation> <uihints minwidth="800"/> - - <code file="biomart_filter.py"/> - <outputs> <data name="output" format="txt" /> </outputs> - <options sanitize="False" refresh="True"/> - </tool> - diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/biomart_filter.py --- a/tools/data_source/biomart_filter.py Tue Oct 07 11:58:32 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -# Greg Von Kuster -import urllib -from galaxy import eggs -from galaxy.datatypes import sniff -from galaxy import datatypes, config -import tempfile, shutil - -def exec_before_job( app, inp_data, out_data, param_dict, tool=None): - """Sets the name of the data""" - data_name = param_dict.get( 'name', 'Biomart query' ) - data_type = param_dict.get( 'type', 'txt' ) - name, data = out_data.items()[0] - if data_type == 'txt': - data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order ) - data = app.datatypes_registry.change_datatype(data, data_type) - data.name = data_name - #store BIOMART parameters temporarily in output file - out = open(data.file_name,'w') - for key, value in param_dict.items(): - print >> out, "%s\t%s" % (key,value) - out.close() - out_data[name] = data - - -def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None): - name, data = out_data.items()[0] - if not isinstance(data.datatype, datatypes.interval.Bed) and isinstance(data.datatype, datatypes.interval.Interval): - #Set meta data, format file to be valid interval type - data.set_meta(first_line_is_header=True) - #check for missing meta data, if all there, comment first line and process file - if not data.missing_meta(): - line_ctr = -1 - temp = tempfile.NamedTemporaryFile('w') - temp_filename = temp.name - temp.close() - temp = open(temp_filename,'w') - chromCol = int(data.metadata.chromCol) - 1 - startCol = int(data.metadata.startCol) - 1 - strandCol = int(data.metadata.strandCol) - 1 - - for line in open(data.file_name, 'r'): - line_ctr += 1 - #First line is a non-commented header line, lets comment it out here - if line_ctr == 0: - temp.write("#%s" % line) - continue - fields = line.strip().split('\t') - #If chrom col is an int, make it chrInt - try: - int(fields[chromCol]) - fields[chromCol] = "chr%s" % fields[chromCol] - except: - try: - if fields[chromCol].upper()== "X" or fields[chromCol].upper()== "Y": - fields[chromCol] = "chr%s" % fields[chromCol].upper() - except: - pass - #change to BED coordinate system - try: - fields[startCol] = str(int(fields[startCol]) - 1) - except: - pass - #set strand to +/-, instead of +1/-1 - try: - if strandCol > 0: - if int(fields[strandCol]) > 0: - fields[strandCol] = "+" - else: - fields[strandCol] = "-" - except: - pass - temp.write("%s\n" % '\t'.join(fields)) - temp.close() - shutil.move(temp_filename,data.file_name) - else: - data_type = sniff.guess_ext(data.file_name) - data = app.datatypes_registry.change_datatype(data, data_type) - if data.missing_meta(): - data.set_meta() - else: - data_type = sniff.guess_ext(data.file_name) - data = app.datatypes_registry.change_datatype(data, data_type) - if data.missing_meta(): - data.set_meta() - data.set_peek() - data.set_size() - data.flush() diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/biomart_test.xml --- a/tools/data_source/biomart_test.xml Tue Oct 07 11:58:32 2008 -0400 +++ b/tools/data_source/biomart_test.xml Tue Oct 07 15:21:46 2008 -0400 @@ -1,27 +1,24 @@ <?xml version="1.0"?> -<tool name="BioMart" id="biomart"> - +<tool name="BioMart" id="biomart" tool_type="data_source"> <description>Test server</description> - - <command interpreter="python"> - biomart.py - $output - </command> - + <command interpreter="python">data_source.py $output</command> <inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top"> <display>go to BioMart Central $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" /> </inputs> - - <uihints minwidth="800"/> - - <code file="biomart_filter.py"/> - + <request_param_translation> + <request_param galaxy_name="URL" remote_name="URL" missing="" /> + <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" /> + <request_param galaxy_name="organism" remote_name="organism" missing="" /> + <request_param galaxy_name="table" remote_name="table" missing="" /> + <request_param galaxy_name="description" remote_name="description" missing="" /> + <request_param galaxy_name="name" remote_name="name" missing="Biomart test query" /> + <request_param galaxy_name="info" remote_name="info" missing="" /> + <request_param galaxy_name="data_type" remote_name="type" missing="txt" /> + </request_param_translation> + <uihints minwidth="800"/> <outputs> <data name="output" format="txt" /> </outputs> - <options sanitize="False" refresh="True"/> - </tool> - diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/data_source.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/data_source/data_source.py Tue Oct 07 15:21:46 2008 -0400 @@ -0,0 +1,66 @@ +#!/usr/bin/env python +#Retreives data from UCSC and stores in a file. UCSC parameters are provided in the input/output file. +import urllib, sys, os, gzip, tempfile, shutil +from galaxy import eggs +from galaxy.datatypes import data + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def check_gzip( filename ): + temp = open( filename, "U" ) + magic_check = temp.read( 2 ) + temp.close() + if magic_check != data.gzip_magic: + return False + return True + +def __main__(): + filename = sys.argv[1] + params = {} + for line in open( filename, 'r' ): + try: + line = line.strip() + fields = line.split( '\t' ) + params[ fields[0] ] = fields[1] + except: + continue + URL = params.get( 'URL', None ) + if not URL: + open( filename, 'w' ).write( "" ) + stop_err( 'The remote data source application has not sent back a URL parameter in the request.' ) + out = open( filename, 'w' ) + CHUNK_SIZE = 2**20 # 1Mb + try: + page = urllib.urlopen( URL, urllib.urlencode( params ) ) + except: + stop_err( 'It appears that the remote data source application is currently off line. Please try again later.' ) + while 1: + chunk = page.read( CHUNK_SIZE ) + if not chunk: + break + out.write( chunk ) + out.close() + if check_gzip( filename ): + fd, uncompressed = tempfile.mkstemp() + gzipped_file = gzip.GzipFile( filename ) + while 1: + try: + chunk = gzipped_file.read( CHUNK_SIZE ) + except IOError: + os.close( fd ) + os.remove( uncompressed ) + gzipped_file.close() + stop_err( 'Problem uncompressing gzipped data, please try retrieving the data uncompressed.' ) + if not chunk: + break + os.write( fd, chunk ) + os.close( fd ) + gzipped_file.close() + # Replace the gzipped file with the uncompressed file + shutil.move( uncompressed, filename ) + +if __name__ == "__main__": __main__() diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/epigraph.py --- a/tools/data_source/epigraph.py Tue Oct 07 11:58:32 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,66 +0,0 @@ -#!/usr/bin/env python -#Retreives data from EpiGRAPH and stores in a file. EpiGRAPH request parameters are provided in the input/output file. -import urllib, sys, os, gzip, tempfile, shutil -from galaxy import eggs -from galaxy.datatypes import data - -assert sys.version_info[:2] >= ( 2, 4 ) - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -def check_gzip( filename ): - temp = open( filename, "U" ) - magic_check = temp.read( 2 ) - temp.close() - if magic_check != data.gzip_magic: - return False - return True - -def __main__(): - filename = sys.argv[1] - params = {} - for line in open( filename, 'r' ): - try: - line = line.strip() - fields = line.split( '\t' ) - params[ fields[0] ] = fields[1] - except: - continue - URL = params.get( 'URL', None ) - if not URL: - open( filename, 'w' ).write( "" ) - stop_err( 'EpiGRAPH has not sent back a URL parameter.' ) - out = open( filename, 'w' ) - CHUNK_SIZE = 2**20 # 1Mb - try: - page = urllib.urlopen( URL, urllib.urlencode( params ) ) - except: - stop_err( 'It appears that the EpiGRAPH server is currently off-line. Please try again later.' ) - while 1: - chunk = page.read( CHUNK_SIZE ) - if not chunk: - break - out.write( chunk ) - out.close() - if check_gzip( filename ): - fd, uncompressed = tempfile.mkstemp() - gzipped_file = gzip.GzipFile( filename ) - while 1: - try: - chunk = gzipped_file.read( CHUNK_SIZE ) - except IOError: - os.close( fd ) - os.remove( uncompressed ) - gzipped_file.close() - stop_err( 'Problem uncompressing gzipped data, please try retrieving the data uncompressed.' ) - if not chunk: - break - os.write( fd, chunk ) - os.close( fd ) - gzipped_file.close() - # Replace the gzipped file with the uncompressed file - shutil.move( uncompressed, filename ) - -if __name__ == "__main__": __main__() diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/epigraph_code.py --- a/tools/data_source/epigraph_code.py Tue Oct 07 11:58:32 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -#Code for direct connection to EpiGRAPH -from galaxy.datatypes import sniff -import urllib - -def exec_before_job( app, inp_data, out_data, param_dict, tool=None ): - """ - EpiGRAPH sends data to Galaxy by passing the following parameters in the request: - 1. URL - the url to which Galaxy should post a request to retrieve the data - 2. GENOME - the name of the UCSC genome assembly (e.g. hg18), dbkey in Galaxy - 3. NAME - data.name in Galaxy - 4. INFO - data.info in Galaxy - """ - items = out_data.items() - for name, data in items: - NAME = urllib.unquote( param_dict.get( 'NAME', None ) ) - if NAME is not None: - data.name = NAME - INFO = urllib.unquote( param_dict.get( 'INFO', None ) ) - if INFO is not None: - data.info = INFO - GENOME = urllib.unquote( param_dict.get( 'GENOME', None ) ) - if GENOME is not None: - data.dbkey = GENOME - else: - data.dbkey = '?' - # Store EpiGRAPH request parameters temporarily in output file - out = open( data.file_name, 'w' ) - for key, value in param_dict.items(): - print >> out, "%s\t%s" % ( key, value ) - out.close() - out_data[ name ] = data - -def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ): - """Verifies the datatype after the run""" - name, data = out_data.items()[0] - if data.extension == 'txt': - data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order ) - data = app.datatypes_registry.change_datatype( data, data_type ) - data.set_peek() - data.set_size() - data.flush() diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/epigraph_import.xml --- a/tools/data_source/epigraph_import.xml Tue Oct 07 11:58:32 2008 -0400 +++ b/tools/data_source/epigraph_import.xml Tue Oct 07 15:21:46 2008 -0400 @@ -1,15 +1,24 @@ <?xml version="1.0"?> -<tool name="EpiGRAPH" id="epigraph_import"> - <description> server</description> - <command interpreter="python">epigraph.py $output</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <uihints minwidth="800"/> - <code file="epigraph_code.py"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> +<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source"> + <description> server</description> + <command interpreter="python">data_source.py $output</command> + <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> + <display>go to EpiGRAPH server $GALAXY_URL</display> + <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> + </inputs> + <request_param_translation> + <request_param galaxy_name="URL" remote_name="URL" missing="" /> + <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> + <request_param galaxy_name="organism" remote_name="organism" missing="" /> + <request_param galaxy_name="table" remote_name="table" missing="" /> + <request_param galaxy_name="description" remote_name="description" missing="" /> + <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> + <request_param galaxy_name="info" remote_name="INFO" missing="" /> + <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> + </request_param_translation> + <uihints minwidth="800"/> + <outputs> + <data name="output" format="txt" /> + </outputs> + <options sanitize="False" refresh="True"/> </tool> diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/flymine.xml --- a/tools/data_source/flymine.xml Tue Oct 07 11:58:32 2008 -0400 +++ b/tools/data_source/flymine.xml Tue Oct 07 15:21:46 2008 -0400 @@ -1,13 +1,22 @@ <?xml version="1.0"?> -<tool name="Flymine" id="flymine"> +<tool name="Flymine" id="flymine" tool_type="data_source"> <description>server</description> - <command interpreter="python">intermine.py $output</command> + <command interpreter="python">data_source.py $output</command> <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get" target="_top"> <display>go to Flymine server $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> </inputs> + <request_param_translation> + <request_param galaxy_name="URL" remote_name="URL" missing="" /> + <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> + <request_param galaxy_name="organism" remote_name="organism" missing="" /> + <request_param galaxy_name="table" remote_name="table" missing="" /> + <request_param galaxy_name="description" remote_name="description" missing="" /> + <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> + <request_param galaxy_name="info" remote_name="info" missing="" /> + <request_param galaxy_name="data_type" remote_name="data_type" missing="interval" /> + </request_param_translation> <uihints minwidth="800"/> - <code file="flymine_filter_code.py"/> <outputs> <data name="output" format="txt" /> </outputs> diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/flymine_filter_code.py --- a/tools/data_source/flymine_filter_code.py Tue Oct 07 11:58:32 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -# Code for direct connection to flymine -from galaxy.datatypes import sniff -import urllib - -import logging -log = logging.getLogger( __name__ ) - -def exec_before_job( app, inp_data, out_data, param_dict, tool=None ): - """Sets the attributes of the data""" - items = out_data.items() - for name, data in items: - data.dbkey = param_dict.get( 'dbkey', '?' ) - # Store flymine parameters temporarily in output file - out = open( data.file_name, 'w' ) - for key, value in param_dict.items(): - out.write( "%s\t%s\n" % ( key, value ) ) - out.close() - out_data[ name ] = data - -def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ): - """Verifies the data after the run""" - name, data = out_data.items()[0] - if data.state == data.states.OK: - data.info = data.name - if data.extension == 'txt': - data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order ) - data = app.datatypes_registry.change_datatype( data, data_type ) - data.set_peek() - data.set_size() - data.flush() - diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/intermine.py --- a/tools/data_source/intermine.py Tue Oct 07 11:58:32 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -#!/usr/bin/env python -#Retreives data from intermine and stores in a file. Intermine parameters are provided in the input/output file. -import urllib, sys, os, gzip, tempfile, shutil -from galaxy import eggs -from galaxy.datatypes import data - -assert sys.version_info[:2] >= ( 2, 4 ) - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -def __main__(): - filename = sys.argv[1] - params = {} - - for line in open( filename, 'r' ): - try: - line = line.strip() - fields = line.split( '\t' ) - params[ fields[0] ] = fields[1] - except: - continue - - URL = params.get( 'URL', None ) - if not URL: - open( filename, 'w' ).write( "" ) - stop_err( 'Datasource has not sent back a URL parameter.' ) - - CHUNK_SIZE = 2**20 # 1Mb - try: - page = urllib.urlopen( URL ) - except Exception, exc: - raise Exception( 'Problems connecting to %s (%s)' % ( URL, exc ) ) - sys.exit( 1 ) - - fp = open( filename, 'wb' ) - while 1: - chunk = page.read( CHUNK_SIZE ) - if not chunk: - break - fp.write( chunk ) - fp.close() - -if __name__ == "__main__": __main__() diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser.py --- a/tools/data_source/ucsc_tablebrowser.py Tue Oct 07 11:58:32 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ -#!/usr/bin/env python -#Retreives data from UCSC and stores in a file. UCSC parameters are provided in the input/output file. -import urllib, sys, os, gzip, tempfile, shutil -from galaxy import eggs -from galaxy.datatypes import data - -assert sys.version_info[:2] >= ( 2, 4 ) - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -def check_gzip( filename ): - temp = open( filename, "U" ) - magic_check = temp.read( 2 ) - temp.close() - if magic_check != data.gzip_magic: - return False - return True - -def __main__(): - filename = sys.argv[1] - params = {} - - for line in open(filename, 'r'): - try: - line = line.strip() - fields = line.split('\t') - params[fields[0]] = fields[1] - except: - continue - - URL = params.get( 'URL', None ) - if not URL: - open( filename, 'w' ).write( "" ) - #raise Exception('Datasource has not sent back a URL parameter') - stop_err( 'Datasource has not sent back a URL parameter.' ) - out = open( filename, 'w' ) - CHUNK_SIZE = 2**20 # 1Mb - try: - page = urllib.urlopen( URL, urllib.urlencode( params ) ) - except: - stop_err( 'It appears that the UCSC Table Browser is currently offline. Please try again later.' ) - - while 1: - chunk = page.read( CHUNK_SIZE ) - if not chunk: - break - out.write( chunk ) - out.close() - if check_gzip( filename ): - fd, uncompressed = tempfile.mkstemp() - gzipped_file = gzip.GzipFile( filename ) - while 1: - try: - chunk = gzipped_file.read( CHUNK_SIZE ) - except IOError: - os.close( fd ) - os.remove( uncompressed ) - gzipped_file.close() - stop_err( 'Problem decompressing gzipped data, please try retrieving the data uncompressed.' ) - if not chunk: - break - os.write( fd, chunk ) - os.close( fd ) - gzipped_file.close() - # Replace the gzipped file with the decompressed file - shutil.move( uncompressed, filename ) - -if __name__ == "__main__": __main__() diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser.xml --- a/tools/data_source/ucsc_tablebrowser.xml Tue Oct 07 11:58:32 2008 -0400 +++ b/tools/data_source/ucsc_tablebrowser.xml Tue Oct 07 15:21:46 2008 -0400 @@ -1,10 +1,7 @@ <?xml version="1.0"?> -<tool name="UCSC Main" id="ucsc_table_direct1"> - +<tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source"> <description>table browser</description> - - <command interpreter="python">ucsc_tablebrowser.py $output</command> - + <command interpreter="python">data_source.py $output</command> <inputs action="http://genome.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> <display>go to UCSC Table Browser $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> @@ -13,15 +10,17 @@ <param name="hgta_compressType" type="hidden" value="none" /> <param name="hgta_outputType" type="hidden" value="bed" /> </inputs> - + <request_param_translation> + <request_param galaxy_name="URL" remote_name="URL" missing="" /> + <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> + <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> + <request_param galaxy_name="table" remote_name="hgta_track" missing="unknown table" /> + <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> + <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="interval" /> + </request_param_translation> <uihints minwidth="800"/> - - <code file="ucsc_tablebrowser_code.py"/> - <outputs> <data name="output" format="bed" /> </outputs> <options sanitize="False" refresh="True"/> - </tool> - diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser_archaea.xml --- a/tools/data_source/ucsc_tablebrowser_archaea.xml Tue Oct 07 11:58:32 2008 -0400 +++ b/tools/data_source/ucsc_tablebrowser_archaea.xml Tue Oct 07 15:21:46 2008 -0400 @@ -1,10 +1,7 @@ <?xml version="1.0"?> -<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1"> - +<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source"> <description>table browser</description> - - <command interpreter="python">ucsc_tablebrowser.py $output</command> - + <command interpreter="python">data_source.py $output</command> <inputs action="http://archaea.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> <display>go to UCSC Table Browser $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> @@ -13,15 +10,17 @@ <param name="hgta_compressType" type="hidden" value="none" /> <param name="hgta_outputType" type="hidden" value="bed" /> </inputs> - + <request_param_translation> + <request_param galaxy_name="URL" remote_name="URL" missing="" /> + <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> + <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> + <request_param galaxy_name="table" remote_name="hgta_track" missing="" /> + <request_param galaxy_name="description" remote_name="hgta_regionType" missing="" /> + <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="interval" /> + </request_param_translation> <uihints minwidth="800"/> - - <code file="ucsc_tablebrowser_code.py"/> - <outputs> <data name="output" format="bed" /> </outputs> <options sanitize="False" refresh="True"/> - </tool> - diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser_code.py --- a/tools/data_source/ucsc_tablebrowser_code.py Tue Oct 07 11:58:32 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ -#Code for direct connection to UCSC -from galaxy import datatypes - -def exec_before_job( app, inp_data, out_data, param_dict, tool=None): - """Sets the name of the data""" - outputType = param_dict.get( 'hgta_outputType', "interval" ).lower() #assume all data is interval, we will fix later if not the case - #list for converting ucsc to galaxy exts, if not in following dictionary, use provided datatype - outputType_to_ext = {'wigdata':'wig','tab':'interval','hyperlinks':'html','sequence':'fasta'} - items = out_data.items() - description = param_dict.get('hgta_regionType',"") - organism = param_dict.get('org',"unkown species") - table = param_dict.get('hgta_track',"") - if description == 'range': - try: - description = param_dict.get('position',"") - except: - description = "unkown position" - for name, data in items: - data.name = "%s on %s: %s (%s)" % (data.name, organism, table, description) - data.dbkey = param_dict.get('db', '?') - ext = outputType - try: - ext = outputType_to_ext[outputType] - except: - pass - if ext not in app.datatypes_registry.datatypes_by_extension: - ext = 'interval' - data = app.datatypes_registry.change_datatype(data, ext) - - #store ucsc parameters temporarily in output file - out = open(data.file_name,'w') - for key, value in param_dict.items(): - print >> out, "%s\t%s" % (key,value) - out.close() - - out_data[name] = data - -def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None): - """Verifies the datatype after the run""" - - name, data = out_data.items()[0] - if data.state == data.states.OK: - data.info = data.name - - if not isinstance(data.datatype, datatypes.interval.Bed) and isinstance(data.datatype, datatypes.interval.Interval): - data.set_meta() - if data.missing_meta(): - data = app.datatypes_registry.change_datatype(data, 'tabular') - data.set_peek() - data.set_size() - data.flush() diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser_test.xml --- a/tools/data_source/ucsc_tablebrowser_test.xml Tue Oct 07 11:58:32 2008 -0400 +++ b/tools/data_source/ucsc_tablebrowser_test.xml Tue Oct 07 15:21:46 2008 -0400 @@ -1,10 +1,7 @@ <?xml version="1.0"?> -<tool name="UCSC Test" id="ucsc_table_direct_test1"> - +<tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source"> <description>table browser</description> - - <command interpreter="python">ucsc_tablebrowser.py $output</command> - + <command interpreter="python">data_source.py $output</command> <inputs action="http://genome-test.cse.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get"> <display>go to UCSC Table Browser $GALAXY_URL</display> <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> @@ -13,15 +10,17 @@ <param name="hgta_compressType" type="hidden" value="none" /> <param name="hgta_outputType" type="hidden" value="bed" /> </inputs> - + <request_param_translation> + <request_param galaxy_name="URL" remote_name="URL" missing="" /> + <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> + <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> + <request_param galaxy_name="table" remote_name="hgta_track" missing="" /> + <request_param galaxy_name="description" remote_name="hgta_regionType" missing="" /> + <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="interval" /> + </request_param_translation> <uihints minwidth="800"/> - - <code file="ucsc_tablebrowser_code.py"/> - <outputs> <data name="output" format="bed" /> </outputs> <options sanitize="False" refresh="True"/> - </tool> -
participants (1)
-
Nate Coraor