[galaxy-dev] [hg] galaxy 2505: Allow for multiple optional outputs to be spec...

29 Jul 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/5a92b5877cf7
changeset: 2505:5a92b5877cf7
user:      Dan Blankenberg <dan@bx.psu.edu>
date:      Mon Jul 27 10:46:25 2009 -0400
description:
Allow for multiple optional outputs to be specified in a tool's xml file by specifying 'filter' tags.  The text contents of the filter tag are evaled. If the result is False, the output will not be created. A NoneDataset object is provided in the param_dict for ease of constructing command lines.

This allows a variable number of output files to be created before the tool is run in cases when the number of outputs is static and can be determined from tool parameters.

Example of an output with filter:

<data format="txt" name="optional_output">
    <filter>some_parameter_name == 'some_parameter_value'</filter>
</data>

The output dataset, optional_output, will only be created when the tool parameter 'some_parameter_name' is 'some_parameter_value'.

2 file(s) affected in this change:

lib/galaxy/tools/__init__.py
lib/galaxy/tools/actions/__init__.py

diffs (192 lines):

diff -r 857d3a8ebd3d -r 5a92b5877cf7 lib/galaxy/tools/__init__.py

--- a/lib/galaxy/tools/__init__.py	Sun Jul 26 12:08:44 2009 -0400
+++ b/lib/galaxy/tools/__init__.py	Mon Jul 27 10:46:25 2009 -0400
@@ -235,12 +235,13 @@
       (format, metadata_source, parent)  
     """
     def __init__( self, name, format=None, metadata_source=None, 
-                  parent=None, label=None ):
+                  parent=None, label=None, filters = None ):
         self.name = name
         self.format = format
         self.metadata_source = metadata_source
         self.parent = parent
         self.label = label
+        self.filters = filters or []
 
     # Tuple emulation
 
@@ -413,6 +414,7 @@
                 output.metadata_source = data_elem.get("metadata_source", "")
                 output.parent = data_elem.get("parent", None)
                 output.label = util.xml_text( data_elem, "label" )
+                output.filters = data_elem.findall( 'filter' )
                 self.outputs[ output.name ] = output
         # Any extra generated config files for the tool
         self.config_files = []
@@ -1294,6 +1296,10 @@
             param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) ))
             for child in hda.children:
                 param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child )
+        for out_name, output in self.outputs.iteritems():
+            if out_name not in param_dict and output.filters:
+               #assume the reason we lack this output is because a filter failed to pass; for tool writing convienence, provide a NoneDataset
+               param_dict[ out_name ] = NoneDataset( datatypes_registry = self.app.datatypes_registry, ext = output.format )
         # We add access to app here, this allows access to app.config, etc
         param_dict['__app__'] = RawObjectWrapper( self.app )
         # More convienent access to app.config.new_file_path; we don't need to wrap a string
diff -r 857d3a8ebd3d -r 5a92b5877cf7 lib/galaxy/tools/actions/__init__.py
--- a/lib/galaxy/tools/actions/__init__.py	Sun Jul 26 12:08:44 2009 -0400
+++ b/lib/galaxy/tools/actions/__init__.py	Mon Jul 27 10:46:25 2009 -0400
@@ -156,76 +156,83 @@
         parent_to_child_pairs = []
         child_dataset_names = set()
         for name, output in tool.outputs.items():
-            if output.parent:
-                parent_to_child_pairs.append( ( output.parent, name ) )
-                child_dataset_names.add( name )
-            ## What is the following hack for? Need to document under what 
-            ## conditions can the following occur? (james@bx.psu.edu)
-            # HACK: the output data has already been created
-            #      this happens i.e. as a result of the async controller
-            if name in incoming:
-                dataid = incoming[name]
-                data = trans.app.model.HistoryDatasetAssociation.get( dataid )
-                assert data != None
-                out_data[name] = data
-            else:
-                # the type should match the input
-                ext = output.format
-                if ext == "input":
-                    ext = input_ext
-                #process change_format tags
-                if output.change_format:
-                    for change_elem in output.change_format:
-                        for when_elem in change_elem.findall( 'when' ):
-                            check = incoming.get( when_elem.get( 'input' ), None )
-                            if check is not None:
-                                if check == when_elem.get( 'value', None ):
-                                    ext = when_elem.get( 'format', ext )
-                            else:
-                                check = when_elem.get( 'input_dataset', None )
+            for filter in output.filters:
+                try:
+                    if not eval( filter.text, globals(), incoming ):
+                        break #do not create this dataset
+                except Exception, e:
+                    log.debug( 'Dataset output filter failed: %s' % e )
+            else: #all filters passed
+                if output.parent:
+                    parent_to_child_pairs.append( ( output.parent, name ) )
+                    child_dataset_names.add( name )
+                ## What is the following hack for? Need to document under what 
+                ## conditions can the following occur? (james@bx.psu.edu)
+                # HACK: the output data has already been created
+                #      this happens i.e. as a result of the async controller
+                if name in incoming:
+                    dataid = incoming[name]
+                    data = trans.app.model.HistoryDatasetAssociation.get( dataid )
+                    assert data != None
+                    out_data[name] = data
+                else:
+                    # the type should match the input
+                    ext = output.format
+                    if ext == "input":
+                        ext = input_ext
+                    #process change_format tags
+                    if output.change_format:
+                        for change_elem in output.change_format:
+                            for when_elem in change_elem.findall( 'when' ):
+                                check = incoming.get( when_elem.get( 'input' ), None )
                                 if check is not None:
-                                    check = inp_data.get( check, None )
+                                    if check == when_elem.get( 'value', None ):
+                                        ext = when_elem.get( 'format', ext )
+                                else:
+                                    check = when_elem.get( 'input_dataset', None )
                                     if check is not None:
-                                        if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ):
-                                            ext = when_elem.get( 'format', ext )
-                data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True )
-                # Commit the dataset immediately so it gets database assigned unique id
-                data.flush()
-                trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )
-            # Create an empty file immediately
-            open( data.file_name, "w" ).close()
-            # This may not be neccesary with the new parent/child associations
-            data.designation = name
-            # Copy metadata from one of the inputs if requested. 
-            if output.metadata_source:
-                data.init_meta( copy_from=inp_data[output.metadata_source] )
-            else:
-                data.init_meta()
-            # Take dbkey from LAST input
-            data.dbkey = str(input_dbkey)
-            # Set state 
-            # FIXME: shouldn't this be NEW until the job runner changes it?
-            data.state = data.states.QUEUED
-            data.blurb = "queued"
-            # Set output label
-            if output.label:
-                params = make_dict_copy( incoming )
-                # wrapping the params allows the tool config to contain things like
-                # <outputs>
-                #     <data format="input" name="output" label="Blat on ${<input_param>.name}" />
-                # </outputs>
-                wrap_values( tool.inputs, params )
-                params['tool'] = tool
-                params['on_string'] = on_text
-                data.name = fill_template( output.label, context=params )
-            else:
-                data.name = tool.name 
-                if on_text:
-                    data.name += ( " on " + on_text )
-            # Store output 
-            out_data[ name ] = data
-            # Store all changes to database
-            trans.app.model.flush()
+                                        check = inp_data.get( check, None )
+                                        if check is not None:
+                                            if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ):
+                                                ext = when_elem.get( 'format', ext )
+                    data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True )
+                    # Commit the dataset immediately so it gets database assigned unique id
+                    data.flush()
+                    trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )
+                # Create an empty file immediately
+                open( data.file_name, "w" ).close()
+                # This may not be neccesary with the new parent/child associations
+                data.designation = name
+                # Copy metadata from one of the inputs if requested. 
+                if output.metadata_source:
+                    data.init_meta( copy_from=inp_data[output.metadata_source] )
+                else:
+                    data.init_meta()
+                # Take dbkey from LAST input
+                data.dbkey = str(input_dbkey)
+                # Set state 
+                # FIXME: shouldn't this be NEW until the job runner changes it?
+                data.state = data.states.QUEUED
+                data.blurb = "queued"
+                # Set output label
+                if output.label:
+                    params = make_dict_copy( incoming )
+                    # wrapping the params allows the tool config to contain things like
+                    # <outputs>
+                    #     <data format="input" name="output" label="Blat on ${<input_param>.name}" />
+                    # </outputs>
+                    wrap_values( tool.inputs, params )
+                    params['tool'] = tool
+                    params['on_string'] = on_text
+                    data.name = fill_template( output.label, context=params )
+                else:
+                    data.name = tool.name 
+                    if on_text:
+                        data.name += ( " on " + on_text )
+                # Store output 
+                out_data[ name ] = data
+                # Store all changes to database
+                trans.app.model.flush()
         # Add all the top-level (non-child) datasets to the history
         for name in out_data.keys():
             if name not in child_dataset_names and name not in incoming: #don't add children; or already existing datasets, i.e. async created

    

[galaxy-dev] [hg] galaxy 2505: Allow for multiple optional outputs to be spec...

Nate Coraor