details:
http://www.bx.psu.edu/hg/galaxy/rev/aae4754d6828
changeset: 1524:aae4754d6828
user: Greg Von Kuster <greg(a)bx.psu.edu>
date: Mon Sep 22 10:36:34 2008 -0400
description:
Integrate with intermine ( data source ) and epigraph ( data destination ). Receiving
data from epigraph coming soon. Data is sent to epigraph using a combination of DATA_URL
and REDIRECT_URL tool params. This tool creates jobs, but does not queue them for
execution.
12 file(s) affected in this change:
lib/galaxy/tools/__init__.py
lib/galaxy/tools/actions/__init__.py
lib/galaxy/tools/parameters/basic.py
lib/galaxy/web/controllers/async.py
lib/galaxy/web/controllers/tool_runner.py
templates/root/redirect.mako
templates/tool_form.tmpl
tool_conf.xml.sample
tools/data_destination/epigraph.xml
tools/data_source/flymine.xml
tools/data_source/flymine_filter_code.py
tools/data_source/intermine.py
diffs (429 lines):
diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/tools/__init__.py Mon Sep 22 10:36:34 2008 -0400
@@ -239,6 +239,16 @@
self.command = interpreter + " " + self.command
else:
self.command = ''
+ # Parameters used to build URL for redirection to external app
+ redirect_url_params = root.find( "redirect_url_params" )
+ if redirect_url_params is not None and redirect_url_params.text is not None:
+ # get rid of leading / trailing white space
+ redirect_url_params = redirect_url_params.text.strip()
+ # Replace remaining white space with something we can safely split on later
+ # when we are building the params
+ self.redirect_url_params = redirect_url_params.replace( ' ',
'**^**' )
+ else:
+ self.redirect_url_params = ''
# Short description of the tool
self.description = util.xml_text(root, "description")
# Job runner
@@ -677,7 +687,7 @@
return "tool_form.tmpl", dict( errors=errors, tool_state=state,
incoming=incoming, error_message=error_message )
# If we've completed the last page we can execute the tool
elif state.page == self.last_page:
- out_data = self.execute( trans, params )
+ out_data = self.execute( trans, incoming=params )
return 'tool_executed.tmpl', dict( out_data=out_data )
# Otherwise move on to the next page
else:
@@ -689,8 +699,8 @@
# Just a refresh, render the form with updated state and errors.
return 'tool_form.tmpl', dict( errors=errors, tool_state=state )
- def update_state( self, trans, inputs, state, incoming,
- prefix="", context=None, update_only=False,
old_errors={}, changed_dependencies={} ):
+ def update_state( self, trans, inputs, state, incoming, prefix="",
context=None,
+ update_only=False, old_errors={}, changed_dependencies={} ):
"""
Update the tool state in `state` using the user input in `incoming`.
This is designed to be called recursively: `inputs` contains the
@@ -877,14 +887,14 @@
raise Exception( "Unexpected parameter type" )
return args
- def execute( self, trans, incoming={}, set_output_hid = True ):
+ def execute( self, trans, incoming={}, set_output_hid=True ):
"""
Execute the tool using parameter values in `incoming`. This just
dispatches to the `ToolAction` instance specified by
`self.tool_action`. In general this will create a `Job` that
when run will build the tool's outputs, e.g. `DefaultToolAction`.
"""
- return self.tool_action.execute( self, trans, incoming, set_output_hid =
set_output_hid )
+ return self.tool_action.execute( self, trans, incoming=incoming,
set_output_hid=set_output_hid )
def params_to_strings( self, params, app ):
return params_to_strings( self.inputs, params, app )
@@ -1045,7 +1055,54 @@
#e.args = ( 'Error substituting into command line. Params: %r, Command:
%s' % ( param_dict, self.command ) )
raise
return command_line
-
+
+ def build_redirect_url_params( self, param_dict ):
+ """Substitute parameter values into
self.redirect_url_params"""
+ if not self.redirect_url_params:
+ return
+ redirect_url_params = None
+ # Substituting parameter values into the url params
+ redirect_url_params = fill_template( self.redirect_url_params, context=param_dict
)
+ # Remove newlines
+ redirect_url_params = redirect_url_params.replace( "\n", " "
).replace( "\r", " " )
+ return redirect_url_params
+
+ def parse_redirect_url( self, inp_data, param_dict ):
+ """Parse the REDIRECT_URL tool param"""
+ # Tools that send data to an external application via a redirect must include the
following 3
+ # tool params:
+ # REDIRECT_URL - the url to which the data is being sent
+ # DATA_URL - the url to which the receiving application will send an http post to
retrieve the Galaxy data
+ # GALAXY_URL - the to which the external application may post data as a response
+ redirect_url = param_dict.get( 'REDIRECT_URL' )
+ redirect_url_params = self.build_redirect_url_params( param_dict )
+ # Add the parameters to the redirect url. We're splitting the param string
on '**^**'
+ # because the self.parse() method replaced white space with that separator.
+ params = redirect_url_params.split( '**^**' )
+ rup_dict = {}
+ for param in params:
+ p_list = param.split( '=' )
+ p_name = p_list[0]
+ p_val = p_list[1]
+ rup_dict[ p_name ] = p_val
+ DATA_URL = param_dict.get( 'DATA_URL', None )
+ assert DATA_URL is not None, "DATA_URL parameter missing in tool
config."
+ # Get the dataset - there should only be 1
+ for name in inp_data.keys():
+ data = inp_data[ name ]
+ DATA_URL += "/%s/display" % str( data.id )
+ redirect_url += "?DATA_URL=%s" % DATA_URL
+ # Add the redirect_url_params to redirect_url
+ for p_name in rup_dict:
+ redirect_url += "&%s=%s" % ( p_name, rup_dict[ p_name ] )
+ # Add the current user email to redirect_url
+ if data.history.user:
+ USERNAME = str( data.history.user.email )
+ else:
+ USERNAME = 'Anonymous'
+ redirect_url += "&USERNAME=%s" % USERNAME
+ return redirect_url
+
def call_hook( self, hook_name, *args, **kwargs ):
"""
Call the custom code hook function identified by 'hook_name' if any,
diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/tools/actions/__init__.py
--- a/lib/galaxy/tools/actions/__init__.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/tools/actions/__init__.py Mon Sep 22 10:36:34 2008 -0400
@@ -2,6 +2,8 @@
from galaxy.tools.parameters import *
from galaxy.util.template import fill_template
from galaxy.util.none_like import NoneDataset
+from galaxy.web import url_for
+from galaxy.jobs import JOB_OK
import logging
log = logging.getLogger( __name__ )
@@ -63,7 +65,7 @@
tool.visit_inputs( param_values, visitor )
return input_datasets
- def execute(self, tool, trans, incoming={}, set_output_hid = True ):
+ def execute(self, tool, trans, incoming={}, set_output_hid=True ):
out_data = {}
# Collect any input datasets from the incoming parameters
inp_data = self.collect_input_datasets( tool, incoming, trans )
@@ -90,15 +92,12 @@
on_text = '%s, %s, and others' % tuple(input_names[0:2])
else:
on_text = ""
-
# Add the dbkey to the incoming parameters
incoming[ "dbkey" ] = input_dbkey
-
# Keep track of parent / child relationships, we'll create all the
# datasets first, then create the associations
parent_to_child_pairs = []
child_dataset_names = set()
-
for name, output in tool.outputs.items():
if output.parent:
parent_to_child_pairs.append( ( output.parent, name ) )
@@ -149,23 +148,19 @@
out_data[ name ] = data
# Store all changes to database
trans.app.model.flush()
-
# Add all the top-level (non-child) datasets to the history
for name in out_data.keys():
if name not in child_dataset_names and name not in incoming: #don't add
children; or already existing datasets, i.e. async created
data = out_data[ name ]
trans.history.add_dataset( data, set_hid = set_output_hid )
data.flush()
-
# Add all the children to their parents
for parent_name, child_name in parent_to_child_pairs:
parent_dataset = out_data[ parent_name ]
child_dataset = out_data[ child_name ]
parent_dataset.children.append( child_dataset )
-
# Store data after custom code runs
trans.app.model.flush()
-
# Create the job object
job = trans.app.model.Job()
job.session_id = trans.get_galaxy_session( create=True ).id
@@ -189,8 +184,19 @@
for name, dataset in out_data.iteritems():
job.add_output_dataset( name, dataset )
trans.app.model.flush()
-
- # Queue the job for execution
- trans.app.job_queue.put( job.id, tool )
- trans.log_event( "Added job to the job queue, id: %s" % str(job.id),
tool_id=job.tool_id )
- return out_data
+ # Some tools are not really executable, but jobs are still created for them ( for
record keeping ).
+ # Examples include tools that redirect to other applications ( epigraph ). These
special tools must
+ # include something that can be retrieved from the params ( e.g., REDIRECT_URL )
to keep the job
+ # from being queued.
+ if 'REDIRECT_URL' in incoming:
+ redirect_url = tool.parse_redirect_url( inp_data, incoming )
+ # Job should not be queued, so set state to ok
+ job.state = JOB_OK
+ job.info = "Redirected to: %s" % redirect_url
+ job.flush()
+ trans.response.send_redirect( url_for( controller='tool_runner',
action='redirect', redirect_url=redirect_url ) )
+ else:
+ # Queue the job for execution
+ trans.app.job_queue.put( job.id, tool )
+ trans.log_event( "Added job to the job queue, id: %s" %
str(job.id), tool_id=job.tool_id )
+ return out_data
diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/tools/parameters/basic.py
--- a/lib/galaxy/tools/parameters/basic.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/tools/parameters/basic.py Mon Sep 22 10:36:34 2008 -0400
@@ -332,6 +332,8 @@
return form_builder.HiddenField( self.name, self.value )
def get_initial_value( self, trans, context ):
return self.value
+ def get_label( self ):
+ return None
## This is clearly a HACK, parameters should only be used for things the user
## can change, there needs to be a different way to specify this. I'm leaving
@@ -354,6 +356,9 @@
return form_builder.HiddenField( self.name, self.get_value( trans ) )
def get_initial_value( self, trans, context ):
return self.value
+ def get_label( self ):
+ # BaseURLToolParameters are ultimately "hidden" parameters
+ return None
class SelectToolParameter( ToolParameter ):
"""
diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/web/controllers/async.py
--- a/lib/galaxy/web/controllers/async.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/web/controllers/async.py Mon Sep 22 10:36:34 2008 -0400
@@ -68,8 +68,8 @@
galaxy_url = trans.request.base + '/async/%s/%s/%s' % ( tool_id,
data.id, key )
galaxy_url = params.get("GALAXY_URL",galaxy_url)
params = dict( url=URL, GALAXY_URL=galaxy_url )
- params[tool.outputs.keys()[0]] = data.id #assume there is exactly one
output file possible
- #tool.execute( app=self.app, history=history, incoming=params )
+ # Assume there is exactly one output file possible
+ params[tool.outputs.keys()[0]] = data.id
tool.execute( trans, incoming=params )
else:
log.debug('async error -> %s' % STATUS)
diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/web/controllers/tool_runner.py
--- a/lib/galaxy/web/controllers/tool_runner.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/web/controllers/tool_runner.py Mon Sep 22 10:36:34 2008 -0400
@@ -51,3 +51,10 @@
add_frame.wiki_url = trans.app.config.wiki_url
add_frame.from_noframe = True
return trans.fill_template( template, history=history, toolbox=toolbox,
tool=tool, util=util, add_frame=add_frame, **vars )
+
+ @web.expose
+ def redirect( self, trans, redirect_url=None, **kwd ):
+ if not redirect_url:
+ return trans.show_error_message( "Required URL for redirection
missing" )
+ trans.log_event( "Redirecting to: %s" % redirect_url )
+ return trans.fill_template( 'root/redirect.mako',
redirect_url=redirect_url )
diff -r dabed25dfbaf -r aae4754d6828 templates/root/redirect.mako
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/root/redirect.mako Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,5 @@
+<%inherit file="/base.mako"/>
+
+<script type="text/javascript">
+ top.location.href = '${redirect_url}';
+</script>
\ No newline at end of file
diff -r dabed25dfbaf -r aae4754d6828 templates/tool_form.tmpl
--- a/templates/tool_form.tmpl Sun Sep 21 17:36:28 2008 -0400
+++ b/templates/tool_form.tmpl Mon Sep 22 10:36:34 2008 -0400
@@ -73,10 +73,12 @@
#set cls = "form-row"
#end if
<div class="$cls">
- <label>
- ${param.get_label()}:
- </label>
-
+ #set label = $param.get_label()
+ #if $label:
+ <label>
+ $label:
+ </label>
+ #end if
#set field = $param.get_html_field( $caller, $parent_state[ $param.name ],
$context )
#set $field.refresh_on_change = $param.refresh_on_change
<div style="float: left; width: 250px; margin-right:
10px;">$field.get_html( $prefix )</div>
diff -r dabed25dfbaf -r aae4754d6828 tool_conf.xml.sample
--- a/tool_conf.xml.sample Sun Sep 21 17:36:28 2008 -0400
+++ b/tool_conf.xml.sample Mon Sep 22 10:36:34 2008 -0400
@@ -9,6 +9,7 @@
<tool file="data_source/biomart.xml" />
<tool file="data_source/biomart_test.xml" />
<tool file="data_source/gbrowse_elegans.xml" />
+ <tool file="data_source/flymine.xml" />
<tool file="data_source/encode_db.xml" />
<tool file="data_source/hbvar.xml" />
<tool file="validation/fix_errors.xml" />
@@ -20,6 +21,9 @@
<tool
file="data_source/encode_import_transcription_regulation.xml"/>
<tool file="data_source/encode_import_all_latest_datasets.xml" />
<tool file="data_source/encode_import_gencode.xml" />
+ </section>
+ <section name="Send Data" id="send">
+ <tool file="data_destination/epigraph.xml" />
</section>
<section name="ENCODE Tools" id="EncodeTools">
<tool file="encode/gencode_partition.xml" />
diff -r dabed25dfbaf -r aae4754d6828 tools/data_destination/epigraph.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_destination/epigraph.xml Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<tool name="Perform EpiGRAPH" id="epigraph">
+ <description> Genome analysis and prediction</description>
+ <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name}
INFO=${input1.info}</redirect_url_params>
+ <inputs>
+ <param format="bed" name="input1" type="data"
label="Send this dataset to EpiGRAPH">
+ <validator type="unspecified_build" />
+ </param>
+ <param name="REDIRECT_URL" type="hidden"
value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp"
/>
+ <param name="DATA_URL" type="baseurl"
value="/datasets" />
+ <param name="GALAXY_URL" type="baseurl"
value="/tool_runner?tool_id=epigraph_import" />
+ </inputs>
+ <outputs/>
+ <help>
+**What it does**
+
+This tool sends the selected dataset to EpiGRAPH for in-depth analysis and prediction.
+
+ </help>
+</tool>
+
diff -r dabed25dfbaf -r aae4754d6828 tools/data_source/flymine.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/flymine.xml Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<tool name="Flymine" id="flymine">
+ <description>server</description>
+ <command interpreter="python">intermine.py $output</command>
+ <inputs
action="http://preview.flymine.org/preview/begin.do"
check_values="false" method="get" target="_top">
+ <display>go to Flymine server $GALAXY_URL</display>
+ <param name="GALAXY_URL" type="baseurl"
value="/tool_runner?tool_id=flymine" />
+ </inputs>
+ <uihints minwidth="800"/>
+ <code file="flymine_filter_code.py"/>
+ <outputs>
+ <data name="output" format="txt" />
+ </outputs>
+ <options sanitize="False" refresh="True"/>
+</tool>
+
diff -r dabed25dfbaf -r aae4754d6828 tools/data_source/flymine_filter_code.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/flymine_filter_code.py Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,31 @@
+# Code for direct connection to flymine
+from galaxy.datatypes import sniff
+import urllib
+
+import logging
+log = logging.getLogger( __name__ )
+
+def exec_before_job( app, inp_data, out_data, param_dict, tool=None ):
+ """Sets the attributes of the data"""
+ items = out_data.items()
+ for name, data in items:
+ data.dbkey = param_dict.get( 'dbkey', '?' )
+ # Store flymine parameters temporarily in output file
+ out = open( data.file_name, 'w' )
+ for key, value in param_dict.items():
+ out.write( "%s\t%s\n" % ( key, value ) )
+ out.close()
+ out_data[ name ] = data
+
+def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None,
stderr=None ):
+ """Verifies the data after the run"""
+ name, data = out_data.items()[0]
+ if data.state == data.states.OK:
+ data.info = data.name
+ if data.extension == 'txt':
+ data_type = sniff.guess_ext( data.file_name,
sniff_order=app.datatypes_registry.sniff_order )
+ data = app.datatypes_registry.change_datatype( data, data_type )
+ data.set_peek()
+ data.set_size()
+ data.flush()
+
diff -r dabed25dfbaf -r aae4754d6828 tools/data_source/intermine.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/intermine.py Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+#Retreives data from intermine and stores in a file. Intermine parameters are provided in
the input/output file.
+import urllib, sys, os, gzip, tempfile, shutil
+from galaxy import eggs
+from galaxy.datatypes import data
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+ sys.stderr.write( msg )
+ sys.exit()
+
+def __main__():
+ filename = sys.argv[1]
+ params = {}
+
+ for line in open( filename, 'r' ):
+ try:
+ line = line.strip()
+ fields = line.split( '\t' )
+ params[ fields[0] ] = fields[1]
+ except:
+ continue
+
+ URL = params.get( 'URL', None )
+ if not URL:
+ open( filename, 'w' ).write( "" )
+ stop_err( 'Datasource has not sent back a URL parameter.' )
+
+ CHUNK_SIZE = 2**20 # 1Mb
+ try:
+ page = urllib.urlopen( URL )
+ except Exception, exc:
+ raise Exception( 'Problems connecting to %s (%s)' % ( URL, exc ) )
+ sys.exit( 1 )
+
+ fp = open( filename, 'wb' )
+ while 1:
+ chunk = page.read( CHUNK_SIZE )
+ if not chunk:
+ break
+ fp.write( chunk )
+ fp.close()
+
+if __name__ == "__main__": __main__()