details: http://www.bx.psu.edu/hg/galaxy/rev/ae341e281c89 changeset: 1548:ae341e281c89 user: Greg Von Kuster <greg@bx.psu.edu> date: Mon Oct 06 13:34:51 2008 -0400 description: Treat EpiGRAPH as a data source much like ucsc table browser. 6 file(s) affected in this change: lib/galaxy/tools/__init__.py tool_conf.xml.sample tools/data_destination/epigraph.xml tools/data_source/epigraph.py tools/data_source/epigraph_code.py tools/data_source/epigraph_import.xml diffs (174 lines): diff -r 684b78f79f8b -r ae341e281c89 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Mon Oct 06 11:48:35 2008 -0400 +++ b/lib/galaxy/tools/__init__.py Mon Oct 06 13:34:51 2008 -0400 @@ -1070,11 +1070,10 @@ def parse_redirect_url( self, inp_data, param_dict ): """Parse the REDIRECT_URL tool param""" - # Tools that send data to an external application via a redirect must include the following 3 - # tool params: + # Tools that send data to an external application via a redirect must include the following 3 tool params: # REDIRECT_URL - the url to which the data is being sent # DATA_URL - the url to which the receiving application will send an http post to retrieve the Galaxy data - # GALAXY_URL - the to which the external application may post data as a response + # GALAXY_URL - the url to which the external application may post data as a response redirect_url = param_dict.get( 'REDIRECT_URL' ) redirect_url_params = self.build_redirect_url_params( param_dict ) # Add the parameters to the redirect url. We're splitting the param string on '**^**' diff -r 684b78f79f8b -r ae341e281c89 tool_conf.xml.sample --- a/tool_conf.xml.sample Mon Oct 06 11:48:35 2008 -0400 +++ b/tool_conf.xml.sample Mon Oct 06 13:34:51 2008 -0400 @@ -10,6 +10,7 @@ <tool file="data_source/biomart_test.xml" /> <tool file="data_source/gbrowse_elegans.xml" /> <tool file="data_source/flymine.xml" /> + <tool file="data_source/epigraph_import.xml" /> <tool file="data_source/encode_db.xml" /> <tool file="data_source/hbvar.xml" /> <tool file="validation/fix_errors.xml" /> diff -r 684b78f79f8b -r ae341e281c89 tools/data_destination/epigraph.xml --- a/tools/data_destination/epigraph.xml Mon Oct 06 11:48:35 2008 -0400 +++ b/tools/data_destination/epigraph.xml Mon Oct 06 13:34:51 2008 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> -<tool name="Perform EpiGRAPH" id="epigraph"> - <description> Genome analysis and prediction</description> +<tool name="Perform genome" id="epigraph_export"> + <description> analysis and prediction with EpiGRAPH</description> <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> <inputs> <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> diff -r 684b78f79f8b -r ae341e281c89 tools/data_source/epigraph.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/data_source/epigraph.py Mon Oct 06 13:34:51 2008 -0400 @@ -0,0 +1,66 @@ +#!/usr/bin/env python +#Retreives data from EpiGRAPH and stores in a file. EpiGRAPH request parameters are provided in the input/output file. +import urllib, sys, os, gzip, tempfile, shutil +from galaxy import eggs +from galaxy.datatypes import data + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def check_gzip( filename ): + temp = open( filename, "U" ) + magic_check = temp.read( 2 ) + temp.close() + if magic_check != data.gzip_magic: + return False + return True + +def __main__(): + filename = sys.argv[1] + params = {} + for line in open( filename, 'r' ): + try: + line = line.strip() + fields = line.split( '\t' ) + params[ fields[0] ] = fields[1] + except: + continue + URL = params.get( 'URL', None ) + if not URL: + open( filename, 'w' ).write( "" ) + stop_err( 'EpiGRAPH has not sent back a URL parameter.' ) + out = open( filename, 'w' ) + CHUNK_SIZE = 2**20 # 1Mb + try: + page = urllib.urlopen( URL, urllib.urlencode( params ) ) + except: + stop_err( 'It appears that the EpiGRAPH server is currently off-line. Please try again later.' ) + while 1: + chunk = page.read( CHUNK_SIZE ) + if not chunk: + break + out.write( chunk ) + out.close() + if check_gzip( filename ): + fd, uncompressed = tempfile.mkstemp() + gzipped_file = gzip.GzipFile( filename ) + while 1: + try: + chunk = gzipped_file.read( CHUNK_SIZE ) + except IOError: + os.close( fd ) + os.remove( uncompressed ) + gzipped_file.close() + stop_err( 'Problem uncompressing gzipped data, please try retrieving the data uncompressed.' ) + if not chunk: + break + os.write( fd, chunk ) + os.close( fd ) + gzipped_file.close() + # Replace the gzipped file with the uncompressed file + shutil.move( uncompressed, filename ) + +if __name__ == "__main__": __main__() diff -r 684b78f79f8b -r ae341e281c89 tools/data_source/epigraph_code.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/data_source/epigraph_code.py Mon Oct 06 13:34:51 2008 -0400 @@ -0,0 +1,41 @@ +#Code for direct connection to EpiGRAPH +from galaxy.datatypes import sniff +import urllib + +def exec_before_job( app, inp_data, out_data, param_dict, tool=None ): + """ + EpiGRAPH sends data to Galaxy by passing the following parameters in the request: + 1. URL - the url to which Galaxy should post a request to retrieve the data + 2. GENOME - the name of the UCSC genome assembly (e.g. hg18), dbkey in Galaxy + 3. NAME - data.name in Galaxy + 4. INFO - data.info in Galaxy + """ + items = out_data.items() + for name, data in items: + NAME = urllib.unquote( param_dict.get( 'NAME', None ) ) + if NAME is not None: + data.name = NAME + INFO = urllib.unquote( param_dict.get( 'INFO', None ) ) + if INFO is not None: + data.info = INFO + GENOME = urllib.unquote( param_dict.get( 'GENOME', None ) ) + if GENOME is not None: + data.dbkey = GENOME + else: + data.dbkey = '?' + # Store EpiGRAPH request parameters temporarily in output file + out = open( data.file_name, 'w' ) + for key, value in param_dict.items(): + print >> out, "%s\t%s" % ( key, value ) + out.close() + out_data[ name ] = data + +def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ): + """Verifies the datatype after the run""" + name, data = out_data.items()[0] + if data.extension == 'txt': + data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order ) + data = app.datatypes_registry.change_datatype( data, data_type ) + data.set_peek() + data.set_size() + data.flush() diff -r 684b78f79f8b -r ae341e281c89 tools/data_source/epigraph_import.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/data_source/epigraph_import.xml Mon Oct 06 13:34:51 2008 -0400 @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<tool name="EpiGRAPH" id="epigraph_import"> + <description> server</description> + <command interpreter="python">epigraph.py $output</command> + <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> + <display>go to EpiGRAPH server $GALAXY_URL</display> + <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> + </inputs> + <uihints minwidth="800"/> + <code file="epigraph_code.py"/> + <outputs> + <data name="output" format="txt" /> + </outputs> + <options sanitize="False" refresh="True"/> +</tool>