details: http://www.bx.psu.edu/hg/galaxy/rev/5a92b5877cf7 changeset: 2505:5a92b5877cf7 user: Dan Blankenberg dan@bx.psu.edu date: Mon Jul 27 10:46:25 2009 -0400 description: Allow for multiple optional outputs to be specified in a tool's xml file by specifying 'filter' tags. The text contents of the filter tag are evaled. If the result is False, the output will not be created. A NoneDataset object is provided in the param_dict for ease of constructing command lines.
This allows a variable number of output files to be created before the tool is run in cases when the number of outputs is static and can be determined from tool parameters.
Example of an output with filter:
<data format="txt" name="optional_output"> <filter>some_parameter_name == 'some_parameter_value'</filter> </data>
The output dataset, optional_output, will only be created when the tool parameter 'some_parameter_name' is 'some_parameter_value'.
2 file(s) affected in this change:
lib/galaxy/tools/__init__.py lib/galaxy/tools/actions/__init__.py
diffs (192 lines):
diff -r 857d3a8ebd3d -r 5a92b5877cf7 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Sun Jul 26 12:08:44 2009 -0400 +++ b/lib/galaxy/tools/__init__.py Mon Jul 27 10:46:25 2009 -0400 @@ -235,12 +235,13 @@ (format, metadata_source, parent) """ def __init__( self, name, format=None, metadata_source=None, - parent=None, label=None ): + parent=None, label=None, filters = None ): self.name = name self.format = format self.metadata_source = metadata_source self.parent = parent self.label = label + self.filters = filters or []
# Tuple emulation
@@ -413,6 +414,7 @@ output.metadata_source = data_elem.get("metadata_source", "") output.parent = data_elem.get("parent", None) output.label = util.xml_text( data_elem, "label" ) + output.filters = data_elem.findall( 'filter' ) self.outputs[ output.name ] = output # Any extra generated config files for the tool self.config_files = [] @@ -1294,6 +1296,10 @@ param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) )) for child in hda.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) + for out_name, output in self.outputs.iteritems(): + if out_name not in param_dict and output.filters: + #assume the reason we lack this output is because a filter failed to pass; for tool writing convienence, provide a NoneDataset + param_dict[ out_name ] = NoneDataset( datatypes_registry = self.app.datatypes_registry, ext = output.format ) # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper( self.app ) # More convienent access to app.config.new_file_path; we don't need to wrap a string diff -r 857d3a8ebd3d -r 5a92b5877cf7 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py Sun Jul 26 12:08:44 2009 -0400 +++ b/lib/galaxy/tools/actions/__init__.py Mon Jul 27 10:46:25 2009 -0400 @@ -156,76 +156,83 @@ parent_to_child_pairs = [] child_dataset_names = set() for name, output in tool.outputs.items(): - if output.parent: - parent_to_child_pairs.append( ( output.parent, name ) ) - child_dataset_names.add( name ) - ## What is the following hack for? Need to document under what - ## conditions can the following occur? (james@bx.psu.edu) - # HACK: the output data has already been created - # this happens i.e. as a result of the async controller - if name in incoming: - dataid = incoming[name] - data = trans.app.model.HistoryDatasetAssociation.get( dataid ) - assert data != None - out_data[name] = data - else: - # the type should match the input - ext = output.format - if ext == "input": - ext = input_ext - #process change_format tags - if output.change_format: - for change_elem in output.change_format: - for when_elem in change_elem.findall( 'when' ): - check = incoming.get( when_elem.get( 'input' ), None ) - if check is not None: - if check == when_elem.get( 'value', None ): - ext = when_elem.get( 'format', ext ) - else: - check = when_elem.get( 'input_dataset', None ) + for filter in output.filters: + try: + if not eval( filter.text, globals(), incoming ): + break #do not create this dataset + except Exception, e: + log.debug( 'Dataset output filter failed: %s' % e ) + else: #all filters passed + if output.parent: + parent_to_child_pairs.append( ( output.parent, name ) ) + child_dataset_names.add( name ) + ## What is the following hack for? Need to document under what + ## conditions can the following occur? (james@bx.psu.edu) + # HACK: the output data has already been created + # this happens i.e. as a result of the async controller + if name in incoming: + dataid = incoming[name] + data = trans.app.model.HistoryDatasetAssociation.get( dataid ) + assert data != None + out_data[name] = data + else: + # the type should match the input + ext = output.format + if ext == "input": + ext = input_ext + #process change_format tags + if output.change_format: + for change_elem in output.change_format: + for when_elem in change_elem.findall( 'when' ): + check = incoming.get( when_elem.get( 'input' ), None ) if check is not None: - check = inp_data.get( check, None ) + if check == when_elem.get( 'value', None ): + ext = when_elem.get( 'format', ext ) + else: + check = when_elem.get( 'input_dataset', None ) if check is not None: - if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ): - ext = when_elem.get( 'format', ext ) - data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True ) - # Commit the dataset immediately so it gets database assigned unique id - data.flush() - trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions ) - # Create an empty file immediately - open( data.file_name, "w" ).close() - # This may not be neccesary with the new parent/child associations - data.designation = name - # Copy metadata from one of the inputs if requested. - if output.metadata_source: - data.init_meta( copy_from=inp_data[output.metadata_source] ) - else: - data.init_meta() - # Take dbkey from LAST input - data.dbkey = str(input_dbkey) - # Set state - # FIXME: shouldn't this be NEW until the job runner changes it? - data.state = data.states.QUEUED - data.blurb = "queued" - # Set output label - if output.label: - params = make_dict_copy( incoming ) - # wrapping the params allows the tool config to contain things like - # <outputs> - # <data format="input" name="output" label="Blat on ${<input_param>.name}" /> - # </outputs> - wrap_values( tool.inputs, params ) - params['tool'] = tool - params['on_string'] = on_text - data.name = fill_template( output.label, context=params ) - else: - data.name = tool.name - if on_text: - data.name += ( " on " + on_text ) - # Store output - out_data[ name ] = data - # Store all changes to database - trans.app.model.flush() + check = inp_data.get( check, None ) + if check is not None: + if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ): + ext = when_elem.get( 'format', ext ) + data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True ) + # Commit the dataset immediately so it gets database assigned unique id + data.flush() + trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions ) + # Create an empty file immediately + open( data.file_name, "w" ).close() + # This may not be neccesary with the new parent/child associations + data.designation = name + # Copy metadata from one of the inputs if requested. + if output.metadata_source: + data.init_meta( copy_from=inp_data[output.metadata_source] ) + else: + data.init_meta() + # Take dbkey from LAST input + data.dbkey = str(input_dbkey) + # Set state + # FIXME: shouldn't this be NEW until the job runner changes it? + data.state = data.states.QUEUED + data.blurb = "queued" + # Set output label + if output.label: + params = make_dict_copy( incoming ) + # wrapping the params allows the tool config to contain things like + # <outputs> + # <data format="input" name="output" label="Blat on ${<input_param>.name}" /> + # </outputs> + wrap_values( tool.inputs, params ) + params['tool'] = tool + params['on_string'] = on_text + data.name = fill_template( output.label, context=params ) + else: + data.name = tool.name + if on_text: + data.name += ( " on " + on_text ) + # Store output + out_data[ name ] = data + # Store all changes to database + trans.app.model.flush() # Add all the top-level (non-child) datasets to the history for name in out_data.keys(): if name not in child_dataset_names and name not in incoming: #don't add children; or already existing datasets, i.e. async created