commit/galaxy-central: jmchilton: Expose improved sample tracking to tools for implicit map/reduce ops.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/b9ec027d4b1e/ Changeset: b9ec027d4b1e User: jmchilton Date: 2015-02-02 21:29:42+00:00 Summary: Expose improved sample tracking to tools for implicit map/reduce ops. Tools may now use $input.element_identifier during tool evalution for input 'data' parameters with the following semantics: - If the input was specified as a single dataset by the user - this just fallbacks to providing the $input.name. - If the input was mapped over a collection (to produce many jobs) or if the input is a 'multiple="true"' input that was provided a collection - the $input.element_identifier will be the element identifier for the corresponding collection item (generally much more useful the dataset name - since if preserved throughout workflows). 'data_collection' parameters already can access this kind of information - but it is something of a best practice to use simple 'data' parameters since they are compatible with more traditional un-collected datasets. This commit really needs more comments - but Philip Mabon has been patiently waiting for this functionality for a long time. Affected #: 9 files diff -r ff3c5721f74857cf689966abfdeb0dc9390bf605 -r b9ec027d4b1e1b67f26632a55076dbf53e3225bd lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -2685,6 +2685,16 @@ return instances @property + def dataset_elements( self ): + elements = [] + for element in self.elements: + if element.is_collection: + elements.extend( element.child_collection.dataset_elements ) + else: + elements.append( element ) + return elements + + @property def state( self ): # TODO: DatasetCollection state handling... return 'ok' diff -r ff3c5721f74857cf689966abfdeb0dc9390bf605 -r b9ec027d4b1e1b67f26632a55076dbf53e3225bd lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -191,6 +191,10 @@ if data.dbkey not in [None, '?']: input_dbkey = data.dbkey + identifier = getattr( data, "element_identifier", None ) + if identifier is not None: + incoming[ "%s|__identifier__" % name ] = identifier + # Collect chromInfo dataset and add as parameters to incoming ( chrom_info, db_dataset ) = trans.app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id != 'CONVERTER_fasta_to_len' ) if db_dataset: diff -r ff3c5721f74857cf689966abfdeb0dc9390bf605 -r b9ec027d4b1e1b67f26632a55076dbf53e3225bd lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -154,7 +154,7 @@ if isinstance( input, DataToolParameter ) and input.multiple: dataset_instances = input_values[ input.name ] if isinstance( dataset_instances, model.HistoryDatasetCollectionAssociation ): - dataset_instances = dataset_instances.collection.dataset_instances[:] + dataset_instances = dataset_instances.collection.dataset_elements[:] input_values[ input.name ] = \ DatasetListWrapper( dataset_instances, dataset_paths=input_dataset_paths, @@ -199,6 +199,9 @@ tool=self, name=input.name ) + identifier_key = "%s|__identifier__" % input.name + if identifier_key in param_dict: + wrapper_kwds["identifier"] = param_dict[identifier_key] if dataset: #A None dataset does not have a filename real_path = dataset.file_name diff -r ff3c5721f74857cf689966abfdeb0dc9390bf605 -r b9ec027d4b1e1b67f26632a55076dbf53e3225bd lib/galaxy/tools/parameters/meta.py --- a/lib/galaxy/tools/parameters/meta.py +++ b/lib/galaxy/tools/parameters/meta.py @@ -14,6 +14,13 @@ execution). """ + to_remove = [] + for key in incoming.keys(): + if key.endswith("|__identifier__"): + to_remove.append(key) + for key in to_remove: + incoming.pop(key) + def classify_unmodified_parameter( input_key ): value = incoming[ input_key ] if isinstance( value, dict ) and 'values' in value: @@ -118,7 +125,11 @@ subcollection_elements = subcollections.split_dataset_collection_instance( hdc, subcollection_type ) return subcollection_elements else: - hdas = hdc.collection.dataset_instances + hdas = [] + for element in hdc.collection.dataset_elements: + hda = element.dataset_instance + hda.element_identifier = element.element_identifier + hdas.append( hda ) return hdas diff -r ff3c5721f74857cf689966abfdeb0dc9390bf605 -r b9ec027d4b1e1b67f26632a55076dbf53e3225bd lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -187,7 +187,7 @@ def items( self ): return iter( [ ( k, self.get( k ) ) for k, v in self.metadata.items() ] ) - def __init__( self, dataset, datatypes_registry=None, tool=None, name=None, dataset_path=None ): + def __init__( self, dataset, datatypes_registry=None, tool=None, name=None, dataset_path=None, identifier=None ): if not dataset: try: # TODO: allow this to work when working with grouping @@ -205,6 +205,14 @@ self.datatypes_registry = datatypes_registry self.false_path = getattr( dataset_path, "false_path", None ) self.false_extra_files_path = getattr( dataset_path, "false_extra_files_path", None ) + self._element_identifier = identifier + + @property + def element_identifier( self ): + identifier = self._element_identifier + if identifier is None: + identifier = self.name + return identifier @property def is_collection( self ): @@ -270,6 +278,10 @@ datasets = [datasets] def to_wrapper( dataset ): + if hasattr(dataset, "element_identifier"): + element = dataset + dataset = element.dataset_instance + kwargs["identifier"] = element.element_identifier return self._dataset_wrapper( dataset, dataset_paths, **kwargs ) list.__init__( self, map( to_wrapper, datasets ) ) diff -r ff3c5721f74857cf689966abfdeb0dc9390bf605 -r b9ec027d4b1e1b67f26632a55076dbf53e3225bd test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -553,6 +553,93 @@ self.assertEquals( output1_content.strip(), "123" ) self.assertEquals( output2_content.strip(), "456" ) + @skip_without_tool( "identifier_single" ) + def test_identifier_in_map( self ): + history_id = self.dataset_populator.new_history() + hdca_id = self.__build_pair( history_id, [ "123", "456" ] ) + inputs = { + "input1": { 'batch': True, 'values': [ { 'src': 'hdca', 'id': hdca_id } ] }, + } + create_response = self._run( "identifier_single", history_id, inputs ) + self._assert_status_code_is( create_response, 200 ) + create = create_response.json() + outputs = create[ 'outputs' ] + jobs = create[ 'jobs' ] + implicit_collections = create[ 'implicit_collections' ] + self.assertEquals( len( jobs ), 2 ) + self.assertEquals( len( outputs ), 2 ) + self.assertEquals( len( implicit_collections ), 1 ) + output1 = outputs[ 0 ] + output2 = outputs[ 1 ] + output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) + output2_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output2 ) + self.assertEquals( output1_content.strip(), "forward" ) + self.assertEquals( output2_content.strip(), "reverse" ) + + @skip_without_tool( "identifier_single" ) + def test_identifier_outside_map( self ): + history_id = self.dataset_populator.new_history() + new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123' ) + inputs = { + "input1": { 'src': 'hda', 'id': new_dataset1["id"] }, + } + create_response = self._run( "identifier_single", history_id, inputs ) + self._assert_status_code_is( create_response, 200 ) + create = create_response.json() + outputs = create[ 'outputs' ] + jobs = create[ 'jobs' ] + implicit_collections = create[ 'implicit_collections' ] + self.assertEquals( len( jobs ), 1 ) + self.assertEquals( len( outputs ), 1 ) + self.assertEquals( len( implicit_collections ), 0 ) + output1 = outputs[ 0 ] + output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) + self.assertEquals( output1_content.strip(), "Pasted Entry" ) + + @skip_without_tool( "identifier_multiple" ) + def test_identifier_in_multiple_reduce( self ): + history_id = self.dataset_populator.new_history() + hdca_id = self.__build_pair( history_id, [ "123", "456" ] ) + inputs = { + "input1": { 'src': 'hdca', 'id': hdca_id }, + } + create_response = self._run( "identifier_multiple", history_id, inputs ) + self._assert_status_code_is( create_response, 200 ) + create = create_response.json() + outputs = create[ 'outputs' ] + jobs = create[ 'jobs' ] + implicit_collections = create[ 'implicit_collections' ] + self.assertEquals( len( jobs ), 1 ) + self.assertEquals( len( outputs ), 1 ) + self.assertEquals( len( implicit_collections ), 0 ) + output1 = outputs[ 0 ] + output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) + self.assertEquals( output1_content.strip(), "forward\nreverse" ) + + @skip_without_tool( "identifier_multiple" ) + def test_identifier_with_multiple_normal_datasets( self ): + history_id = self.dataset_populator.new_history() + new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123' ) + new_dataset2 = self.dataset_populator.new_dataset( history_id, content='456' ) + inputs = { + "input1": [ + { 'src': 'hda', 'id': new_dataset1["id"] }, + { 'src': 'hda', 'id': new_dataset2["id"] } + ] + } + create_response = self._run( "identifier_multiple", history_id, inputs ) + self._assert_status_code_is( create_response, 200 ) + create = create_response.json() + outputs = create[ 'outputs' ] + jobs = create[ 'jobs' ] + implicit_collections = create[ 'implicit_collections' ] + self.assertEquals( len( jobs ), 1 ) + self.assertEquals( len( outputs ), 1 ) + self.assertEquals( len( implicit_collections ), 0 ) + output1 = outputs[ 0 ] + output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) + self.assertEquals( output1_content.strip(), "Pasted Entry\nPasted Entry" ) + @skip_without_tool( "cat1" ) def test_map_over_nested_collections_legacy( self ): history_id = self.dataset_populator.new_history() diff -r ff3c5721f74857cf689966abfdeb0dc9390bf605 -r b9ec027d4b1e1b67f26632a55076dbf53e3225bd test/functional/tools/identifier_multiple.xml --- /dev/null +++ b/test/functional/tools/identifier_multiple.xml @@ -0,0 +1,15 @@ +<tool id="identifier_multiple" name="identifier_multiple"> + <command> + #for $input in $input1# + echo '$input.element_identifier' >> 'output1'; + #end for# + </command> + <inputs> + <param type="data" name="input1" label="Input 1" multiple="true" /> + </inputs> + <outputs> + <data name="output1" type="tabular" from_work_dir="output1" /> + </outputs> + <tests> + </tests> +</tool> diff -r ff3c5721f74857cf689966abfdeb0dc9390bf605 -r b9ec027d4b1e1b67f26632a55076dbf53e3225bd test/functional/tools/identifier_single.xml --- /dev/null +++ b/test/functional/tools/identifier_single.xml @@ -0,0 +1,13 @@ +<tool id="identifier_single" name="identifier_single"> + <command> + echo '$input1.element_identifier' > 'output1' + </command> + <inputs> + <param type="data" name="input1" label="Input 1" /> + </inputs> + <outputs> + <data name="output1" type="tabular" from_work_dir="output1" /> + </outputs> + <tests> + </tests> +</tool> diff -r ff3c5721f74857cf689966abfdeb0dc9390bf605 -r b9ec027d4b1e1b67f26632a55076dbf53e3225bd test/functional/tools/samples_tool_conf.xml --- a/test/functional/tools/samples_tool_conf.xml +++ b/test/functional/tools/samples_tool_conf.xml @@ -30,6 +30,8 @@ <tool file="validation_default.xml" /><tool file="validation_sanitizer.xml" /><tool file="validation_repeat.xml" /> + <tool file="identifier_single.xml" /> + <tool file="identifier_multiple.xml" /><tool file="collection_paired_test.xml" /><tool file="collection_nested_test.xml" /><tool file="collection_mixed_param.xml" /> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org