[hg] galaxy 1524: Integrate with intermine ( data source ) and e...
details: http://www.bx.psu.edu/hg/galaxy/rev/aae4754d6828 changeset: 1524:aae4754d6828 user: Greg Von Kuster <greg@bx.psu.edu> date: Mon Sep 22 10:36:34 2008 -0400 description: Integrate with intermine ( data source ) and epigraph ( data destination ). Receiving data from epigraph coming soon. Data is sent to epigraph using a combination of DATA_URL and REDIRECT_URL tool params. This tool creates jobs, but does not queue them for execution. 12 file(s) affected in this change: lib/galaxy/tools/__init__.py lib/galaxy/tools/actions/__init__.py lib/galaxy/tools/parameters/basic.py lib/galaxy/web/controllers/async.py lib/galaxy/web/controllers/tool_runner.py templates/root/redirect.mako templates/tool_form.tmpl tool_conf.xml.sample tools/data_destination/epigraph.xml tools/data_source/flymine.xml tools/data_source/flymine_filter_code.py tools/data_source/intermine.py diffs (429 lines): diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Sun Sep 21 17:36:28 2008 -0400 +++ b/lib/galaxy/tools/__init__.py Mon Sep 22 10:36:34 2008 -0400 @@ -239,6 +239,16 @@ self.command = interpreter + " " + self.command else: self.command = '' + # Parameters used to build URL for redirection to external app + redirect_url_params = root.find( "redirect_url_params" ) + if redirect_url_params is not None and redirect_url_params.text is not None: + # get rid of leading / trailing white space + redirect_url_params = redirect_url_params.text.strip() + # Replace remaining white space with something we can safely split on later + # when we are building the params + self.redirect_url_params = redirect_url_params.replace( ' ', '**^**' ) + else: + self.redirect_url_params = '' # Short description of the tool self.description = util.xml_text(root, "description") # Job runner @@ -677,7 +687,7 @@ return "tool_form.tmpl", dict( errors=errors, tool_state=state, incoming=incoming, error_message=error_message ) # If we've completed the last page we can execute the tool elif state.page == self.last_page: - out_data = self.execute( trans, params ) + out_data = self.execute( trans, incoming=params ) return 'tool_executed.tmpl', dict( out_data=out_data ) # Otherwise move on to the next page else: @@ -689,8 +699,8 @@ # Just a refresh, render the form with updated state and errors. return 'tool_form.tmpl', dict( errors=errors, tool_state=state ) - def update_state( self, trans, inputs, state, incoming, - prefix="", context=None, update_only=False, old_errors={}, changed_dependencies={} ): + def update_state( self, trans, inputs, state, incoming, prefix="", context=None, + update_only=False, old_errors={}, changed_dependencies={} ): """ Update the tool state in `state` using the user input in `incoming`. This is designed to be called recursively: `inputs` contains the @@ -877,14 +887,14 @@ raise Exception( "Unexpected parameter type" ) return args - def execute( self, trans, incoming={}, set_output_hid = True ): + def execute( self, trans, incoming={}, set_output_hid=True ): """ Execute the tool using parameter values in `incoming`. This just dispatches to the `ToolAction` instance specified by `self.tool_action`. In general this will create a `Job` that when run will build the tool's outputs, e.g. `DefaultToolAction`. """ - return self.tool_action.execute( self, trans, incoming, set_output_hid = set_output_hid ) + return self.tool_action.execute( self, trans, incoming=incoming, set_output_hid=set_output_hid ) def params_to_strings( self, params, app ): return params_to_strings( self.inputs, params, app ) @@ -1045,7 +1055,54 @@ #e.args = ( 'Error substituting into command line. Params: %r, Command: %s' % ( param_dict, self.command ) ) raise return command_line - + + def build_redirect_url_params( self, param_dict ): + """Substitute parameter values into self.redirect_url_params""" + if not self.redirect_url_params: + return + redirect_url_params = None + # Substituting parameter values into the url params + redirect_url_params = fill_template( self.redirect_url_params, context=param_dict ) + # Remove newlines + redirect_url_params = redirect_url_params.replace( "\n", " " ).replace( "\r", " " ) + return redirect_url_params + + def parse_redirect_url( self, inp_data, param_dict ): + """Parse the REDIRECT_URL tool param""" + # Tools that send data to an external application via a redirect must include the following 3 + # tool params: + # REDIRECT_URL - the url to which the data is being sent + # DATA_URL - the url to which the receiving application will send an http post to retrieve the Galaxy data + # GALAXY_URL - the to which the external application may post data as a response + redirect_url = param_dict.get( 'REDIRECT_URL' ) + redirect_url_params = self.build_redirect_url_params( param_dict ) + # Add the parameters to the redirect url. We're splitting the param string on '**^**' + # because the self.parse() method replaced white space with that separator. + params = redirect_url_params.split( '**^**' ) + rup_dict = {} + for param in params: + p_list = param.split( '=' ) + p_name = p_list[0] + p_val = p_list[1] + rup_dict[ p_name ] = p_val + DATA_URL = param_dict.get( 'DATA_URL', None ) + assert DATA_URL is not None, "DATA_URL parameter missing in tool config." + # Get the dataset - there should only be 1 + for name in inp_data.keys(): + data = inp_data[ name ] + DATA_URL += "/%s/display" % str( data.id ) + redirect_url += "?DATA_URL=%s" % DATA_URL + # Add the redirect_url_params to redirect_url + for p_name in rup_dict: + redirect_url += "&%s=%s" % ( p_name, rup_dict[ p_name ] ) + # Add the current user email to redirect_url + if data.history.user: + USERNAME = str( data.history.user.email ) + else: + USERNAME = 'Anonymous' + redirect_url += "&USERNAME=%s" % USERNAME + return redirect_url + def call_hook( self, hook_name, *args, **kwargs ): """ Call the custom code hook function identified by 'hook_name' if any, diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py Sun Sep 21 17:36:28 2008 -0400 +++ b/lib/galaxy/tools/actions/__init__.py Mon Sep 22 10:36:34 2008 -0400 @@ -2,6 +2,8 @@ from galaxy.tools.parameters import * from galaxy.util.template import fill_template from galaxy.util.none_like import NoneDataset +from galaxy.web import url_for +from galaxy.jobs import JOB_OK import logging log = logging.getLogger( __name__ ) @@ -63,7 +65,7 @@ tool.visit_inputs( param_values, visitor ) return input_datasets - def execute(self, tool, trans, incoming={}, set_output_hid = True ): + def execute(self, tool, trans, incoming={}, set_output_hid=True ): out_data = {} # Collect any input datasets from the incoming parameters inp_data = self.collect_input_datasets( tool, incoming, trans ) @@ -90,15 +92,12 @@ on_text = '%s, %s, and others' % tuple(input_names[0:2]) else: on_text = "" - # Add the dbkey to the incoming parameters incoming[ "dbkey" ] = input_dbkey - # Keep track of parent / child relationships, we'll create all the # datasets first, then create the associations parent_to_child_pairs = [] child_dataset_names = set() - for name, output in tool.outputs.items(): if output.parent: parent_to_child_pairs.append( ( output.parent, name ) ) @@ -149,23 +148,19 @@ out_data[ name ] = data # Store all changes to database trans.app.model.flush() - # Add all the top-level (non-child) datasets to the history for name in out_data.keys(): if name not in child_dataset_names and name not in incoming: #don't add children; or already existing datasets, i.e. async created data = out_data[ name ] trans.history.add_dataset( data, set_hid = set_output_hid ) data.flush() - # Add all the children to their parents for parent_name, child_name in parent_to_child_pairs: parent_dataset = out_data[ parent_name ] child_dataset = out_data[ child_name ] parent_dataset.children.append( child_dataset ) - # Store data after custom code runs trans.app.model.flush() - # Create the job object job = trans.app.model.Job() job.session_id = trans.get_galaxy_session( create=True ).id @@ -189,8 +184,19 @@ for name, dataset in out_data.iteritems(): job.add_output_dataset( name, dataset ) trans.app.model.flush() - - # Queue the job for execution - trans.app.job_queue.put( job.id, tool ) - trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id ) - return out_data + # Some tools are not really executable, but jobs are still created for them ( for record keeping ). + # Examples include tools that redirect to other applications ( epigraph ). These special tools must + # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job + # from being queued. + if 'REDIRECT_URL' in incoming: + redirect_url = tool.parse_redirect_url( inp_data, incoming ) + # Job should not be queued, so set state to ok + job.state = JOB_OK + job.info = "Redirected to: %s" % redirect_url + job.flush() + trans.response.send_redirect( url_for( controller='tool_runner', action='redirect', redirect_url=redirect_url ) ) + else: + # Queue the job for execution + trans.app.job_queue.put( job.id, tool ) + trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id ) + return out_data diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py Sun Sep 21 17:36:28 2008 -0400 +++ b/lib/galaxy/tools/parameters/basic.py Mon Sep 22 10:36:34 2008 -0400 @@ -332,6 +332,8 @@ return form_builder.HiddenField( self.name, self.value ) def get_initial_value( self, trans, context ): return self.value + def get_label( self ): + return None ## This is clearly a HACK, parameters should only be used for things the user ## can change, there needs to be a different way to specify this. I'm leaving @@ -354,6 +356,9 @@ return form_builder.HiddenField( self.name, self.get_value( trans ) ) def get_initial_value( self, trans, context ): return self.value + def get_label( self ): + # BaseURLToolParameters are ultimately "hidden" parameters + return None class SelectToolParameter( ToolParameter ): """ diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/web/controllers/async.py --- a/lib/galaxy/web/controllers/async.py Sun Sep 21 17:36:28 2008 -0400 +++ b/lib/galaxy/web/controllers/async.py Mon Sep 22 10:36:34 2008 -0400 @@ -68,8 +68,8 @@ galaxy_url = trans.request.base + '/async/%s/%s/%s' % ( tool_id, data.id, key ) galaxy_url = params.get("GALAXY_URL",galaxy_url) params = dict( url=URL, GALAXY_URL=galaxy_url ) - params[tool.outputs.keys()[0]] = data.id #assume there is exactly one output file possible - #tool.execute( app=self.app, history=history, incoming=params ) + # Assume there is exactly one output file possible + params[tool.outputs.keys()[0]] = data.id tool.execute( trans, incoming=params ) else: log.debug('async error -> %s' % STATUS) diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/web/controllers/tool_runner.py --- a/lib/galaxy/web/controllers/tool_runner.py Sun Sep 21 17:36:28 2008 -0400 +++ b/lib/galaxy/web/controllers/tool_runner.py Mon Sep 22 10:36:34 2008 -0400 @@ -51,3 +51,10 @@ add_frame.wiki_url = trans.app.config.wiki_url add_frame.from_noframe = True return trans.fill_template( template, history=history, toolbox=toolbox, tool=tool, util=util, add_frame=add_frame, **vars ) + + @web.expose + def redirect( self, trans, redirect_url=None, **kwd ): + if not redirect_url: + return trans.show_error_message( "Required URL for redirection missing" ) + trans.log_event( "Redirecting to: %s" % redirect_url ) + return trans.fill_template( 'root/redirect.mako', redirect_url=redirect_url ) diff -r dabed25dfbaf -r aae4754d6828 templates/root/redirect.mako --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/root/redirect.mako Mon Sep 22 10:36:34 2008 -0400 @@ -0,0 +1,5 @@ +<%inherit file="/base.mako"/> + +<script type="text/javascript"> + top.location.href = '${redirect_url}'; +</script> \ No newline at end of file diff -r dabed25dfbaf -r aae4754d6828 templates/tool_form.tmpl --- a/templates/tool_form.tmpl Sun Sep 21 17:36:28 2008 -0400 +++ b/templates/tool_form.tmpl Mon Sep 22 10:36:34 2008 -0400 @@ -73,10 +73,12 @@ #set cls = "form-row" #end if <div class="$cls"> - <label> - ${param.get_label()}: - </label> - + #set label = $param.get_label() + #if $label: + <label> + $label: + </label> + #end if #set field = $param.get_html_field( $caller, $parent_state[ $param.name ], $context ) #set $field.refresh_on_change = $param.refresh_on_change <div style="float: left; width: 250px; margin-right: 10px;">$field.get_html( $prefix )</div> diff -r dabed25dfbaf -r aae4754d6828 tool_conf.xml.sample --- a/tool_conf.xml.sample Sun Sep 21 17:36:28 2008 -0400 +++ b/tool_conf.xml.sample Mon Sep 22 10:36:34 2008 -0400 @@ -9,6 +9,7 @@ <tool file="data_source/biomart.xml" /> <tool file="data_source/biomart_test.xml" /> <tool file="data_source/gbrowse_elegans.xml" /> + <tool file="data_source/flymine.xml" /> <tool file="data_source/encode_db.xml" /> <tool file="data_source/hbvar.xml" /> <tool file="validation/fix_errors.xml" /> @@ -20,6 +21,9 @@ <tool file="data_source/encode_import_transcription_regulation.xml"/> <tool file="data_source/encode_import_all_latest_datasets.xml" /> <tool file="data_source/encode_import_gencode.xml" /> + </section> + <section name="Send Data" id="send"> + <tool file="data_destination/epigraph.xml" /> </section> <section name="ENCODE Tools" id="EncodeTools"> <tool file="encode/gencode_partition.xml" /> diff -r dabed25dfbaf -r aae4754d6828 tools/data_destination/epigraph.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/data_destination/epigraph.xml Mon Sep 22 10:36:34 2008 -0400 @@ -0,0 +1,21 @@ +<?xml version="1.0"?> +<tool name="Perform EpiGRAPH" id="epigraph"> + <description> Genome analysis and prediction</description> + <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> + <inputs> + <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> + <validator type="unspecified_build" /> + </param> + <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" /> + <param name="DATA_URL" type="baseurl" value="/datasets" /> + <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> + </inputs> + <outputs/> + <help> +**What it does** + +This tool sends the selected dataset to EpiGRAPH for in-depth analysis and prediction. + + </help> +</tool> + diff -r dabed25dfbaf -r aae4754d6828 tools/data_source/flymine.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/data_source/flymine.xml Mon Sep 22 10:36:34 2008 -0400 @@ -0,0 +1,16 @@ +<?xml version="1.0"?> +<tool name="Flymine" id="flymine"> + <description>server</description> + <command interpreter="python">intermine.py $output</command> + <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get" target="_top"> + <display>go to Flymine server $GALAXY_URL</display> + <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> + </inputs> + <uihints minwidth="800"/> + <code file="flymine_filter_code.py"/> + <outputs> + <data name="output" format="txt" /> + </outputs> + <options sanitize="False" refresh="True"/> +</tool> + diff -r dabed25dfbaf -r aae4754d6828 tools/data_source/flymine_filter_code.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/data_source/flymine_filter_code.py Mon Sep 22 10:36:34 2008 -0400 @@ -0,0 +1,31 @@ +# Code for direct connection to flymine +from galaxy.datatypes import sniff +import urllib + +import logging +log = logging.getLogger( __name__ ) + +def exec_before_job( app, inp_data, out_data, param_dict, tool=None ): + """Sets the attributes of the data""" + items = out_data.items() + for name, data in items: + data.dbkey = param_dict.get( 'dbkey', '?' ) + # Store flymine parameters temporarily in output file + out = open( data.file_name, 'w' ) + for key, value in param_dict.items(): + out.write( "%s\t%s\n" % ( key, value ) ) + out.close() + out_data[ name ] = data + +def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ): + """Verifies the data after the run""" + name, data = out_data.items()[0] + if data.state == data.states.OK: + data.info = data.name + if data.extension == 'txt': + data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order ) + data = app.datatypes_registry.change_datatype( data, data_type ) + data.set_peek() + data.set_size() + data.flush() + diff -r dabed25dfbaf -r aae4754d6828 tools/data_source/intermine.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/data_source/intermine.py Mon Sep 22 10:36:34 2008 -0400 @@ -0,0 +1,45 @@ +#!/usr/bin/env python +#Retreives data from intermine and stores in a file. Intermine parameters are provided in the input/output file. +import urllib, sys, os, gzip, tempfile, shutil +from galaxy import eggs +from galaxy.datatypes import data + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def __main__(): + filename = sys.argv[1] + params = {} + + for line in open( filename, 'r' ): + try: + line = line.strip() + fields = line.split( '\t' ) + params[ fields[0] ] = fields[1] + except: + continue + + URL = params.get( 'URL', None ) + if not URL: + open( filename, 'w' ).write( "" ) + stop_err( 'Datasource has not sent back a URL parameter.' ) + + CHUNK_SIZE = 2**20 # 1Mb + try: + page = urllib.urlopen( URL ) + except Exception, exc: + raise Exception( 'Problems connecting to %s (%s)' % ( URL, exc ) ) + sys.exit( 1 ) + + fp = open( filename, 'wb' ) + while 1: + chunk = page.read( CHUNK_SIZE ) + if not chunk: + break + fp.write( chunk ) + fp.close() + +if __name__ == "__main__": __main__()
participants (1)
-
gregļ¼ scofield.bx.psu.edu