10 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/d5218767204d/ Changeset: d5218767204d User: jmchilton Date: 2014-09-16 15:57:48+00:00 Summary: Allow sending singleton lists of datasets to single dataset parameters via API tool submission. Request from Sam. Affected #: 2 files diff -r 80d68ea889c01357b520ecefc56f86b0a44549fc -r d5218767204d12af6275b320409e507ae3dfe95b lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -1903,6 +1903,10 @@ raise ValueError( "The previously selected dataset has been previously deleted" ) if hasattr( v, "dataset" ) and v.dataset.state in [ galaxy.model.Dataset.states.ERROR, galaxy.model.Dataset.states.DISCARDED ]: raise ValueError( "The previously selected dataset has entered an unusable state" ) + if not self.multiple: + if len( values ) > 1: + raise ValueError( "More than one dataset supplied to single input dataset parameter.") + rval = values[ 0 ] return rval def to_string( self, value, app ): diff -r 80d68ea889c01357b520ecefc56f86b0a44549fc -r d5218767204d12af6275b320409e507ae3dfe95b test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -109,6 +109,20 @@ output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) self.assertEqual( output1_content.strip(), "Cat1Test" ) + @skip_without_tool( "cat1" ) + def test_run_cat1_listified_param( self ): + # Run simple non-upload tool with an input data parameter. + history_id = self.dataset_populator.new_history() + new_dataset = self.dataset_populator.new_dataset( history_id, content='Cat1Testlistified' ) + inputs = dict( + input1=[dataset_to_param( new_dataset )], + ) + outputs = self._cat1_outputs( history_id, inputs=inputs ) + self.assertEquals( len( outputs ), 1 ) + output1 = outputs[ 0 ] + output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) + self.assertEqual( output1_content.strip(), "Cat1Testlistified" ) + @skip_without_tool( "validation_default" ) def test_validation( self ): history_id = self.dataset_populator.new_history() https://bitbucket.org/galaxy/galaxy-central/commits/d9c75786e6b4/ Changeset: d9c75786e6b4 User: jmchilton Date: 2014-09-16 15:57:48+00:00 Summary: Overhaul multi-run and collection multi-run tool API jobs. Adding consistency allowing each parameter to be wrapped in a object describing the meta-properties of the submitting value - this was requested by Sam to make the new tool form easier to manage, it makes multi-running properties work for non-data parameters, and allows linked/unlinked specification of parameters. Affected #: 2 files diff -r d5218767204d12af6275b320409e507ae3dfe95b -r d9c75786e6b43f18f9c1eaded635d5d6b236e2a7 lib/galaxy/tools/parameters/meta.py --- a/lib/galaxy/tools/parameters/meta.py +++ b/lib/galaxy/tools/parameters/meta.py @@ -14,6 +14,28 @@ execution). """ + def classifiy_unmodified_parameter( input_key ): + value = incoming[ input_key ] + if isinstance( value, dict ) and 'values' in value: + # Explicit meta wrapper for inputs... + is_batch = value.get( 'batch', False ) + is_linked = value.get( 'linked', True ) + if is_batch and is_linked: + classification = permutations.input_classification.MATCHED + elif is_batch: + classification = permutations.input_classification.MULTIPLIED + else: + classification = permutations.input_classification.SINGLE + if __collection_multirun_parameter( value ): + collection_value = value[ 'values' ][ 0 ] + values = __expand_collection_parameter( trans, input_key, collection_value, collections_to_match ) + else: + values = value[ 'values' ] + else: + classification = permutations.input_classification.SINGLE + values = value + return classification, values + def classifier( input_key ): multirun_key = "%s|__multirun__" % input_key if multirun_key in incoming: @@ -25,7 +47,7 @@ multi_value = None return permutations.input_classification.SINGLE, multi_value[ 0 ] else: - return permutations.input_classification.SINGLE, incoming[ input_key ] + return classifiy_unmodified_parameter( input_key ) from galaxy.dataset_collections import matching collections_to_match = matching.CollectionsToMatch() @@ -34,31 +56,10 @@ multirun_key = "%s|__collection_multirun__" % input_key if multirun_key in incoming: incoming_val = incoming[ multirun_key ] - # If subcollectin multirun of data_collection param - value will - # be "hdca_id|subcollection_type" else it will just be hdca_id - if "|" in incoming_val: - encoded_hdc_id, subcollection_type = incoming_val.split( "|", 1 ) - else: - try: - src = incoming_val[ "src" ] - if src != "hdca": - raise exceptions.ToolMetaParameterException( "Invalid dataset collection source type %s" % src ) - encoded_hdc_id = incoming_val[ "id" ] - except TypeError: - encoded_hdc_id = incoming_val - subcollection_type = None - hdc_id = trans.app.security.decode_id( encoded_hdc_id ) - hdc = trans.sa_session.query( model.HistoryDatasetCollectionAssociation ).get( hdc_id ) - collections_to_match.add( input_key, hdc, subcollection_type=subcollection_type ) - if subcollection_type is not None: - from galaxy.dataset_collections import subcollections - subcollection_elements = subcollections.split_dataset_collection_instance( hdc, subcollection_type ) - return permutations.input_classification.MATCHED, subcollection_elements - else: - hdas = hdc.collection.dataset_instances - return permutations.input_classification.MATCHED, hdas + values = __expand_collection_parameter( trans, input_key, incoming_val, collections_to_match ) + return permutations.input_classification.MATCHED, values else: - return permutations.input_classification.SINGLE, incoming[ input_key ] + return classifiy_unmodified_parameter( input_key ) # Stick an unexpanded version of multirun keys so they can be replaced, # by expand_mult_inputs. @@ -76,8 +77,19 @@ multirun_found = False collection_multirun_found = False for key, value in incoming.iteritems(): - multirun_found = try_replace_key( key, "|__multirun__" ) or multirun_found - collection_multirun_found = try_replace_key( key, "|__collection_multirun__" ) or collection_multirun_found + if isinstance( value, dict ) and 'values' in value: + batch = value.get( 'batch', False ) + if batch: + if __collection_multirun_parameter( value ): + collection_multirun_found = True + else: + multirun_found = True + else: + continue + else: + # Old-style batching (remove someday - didn't live in API long?) + multirun_found = try_replace_key( key, "|__multirun__" ) or multirun_found + collection_multirun_found = try_replace_key( key, "|__collection_multirun__" ) or collection_multirun_found if sum( [ 1 if f else 0 for f in [ multirun_found, collection_multirun_found ] ] ) > 1: # In theory doable, but to complicated for a first pass. @@ -93,3 +105,38 @@ else: collection_info = None return expanded_incomings, collection_info + + +def __expand_collection_parameter( trans, input_key, incoming_val, collections_to_match ): + # If subcollectin multirun of data_collection param - value will + # be "hdca_id|subcollection_type" else it will just be hdca_id + if "|" in incoming_val: + encoded_hdc_id, subcollection_type = incoming_val.split( "|", 1 ) + else: + try: + src = incoming_val[ "src" ] + if src != "hdca": + raise exceptions.ToolMetaParameterException( "Invalid dataset collection source type %s" % src ) + encoded_hdc_id = incoming_val[ "id" ] + except TypeError: + encoded_hdc_id = incoming_val + subcollection_type = None + hdc_id = trans.app.security.decode_id( encoded_hdc_id ) + hdc = trans.sa_session.query( model.HistoryDatasetCollectionAssociation ).get( hdc_id ) + collections_to_match.add( input_key, hdc, subcollection_type=subcollection_type ) + if subcollection_type is not None: + from galaxy.dataset_collections import subcollections + subcollection_elements = subcollections.split_dataset_collection_instance( hdc, subcollection_type ) + return subcollection_elements + else: + hdas = hdc.collection.dataset_instances + return hdas + + +def __collection_multirun_parameter( value ): + batch_values = util.listify( value[ 'values' ] ) + if len( batch_values ) == 1: + batch_over = batch_values[ 0 ] + if isinstance( batch_over, dict ) and ('src' in batch_over) and (batch_over[ 'src' ] == 'hdca'): + return True + return False diff -r d5218767204d12af6275b320409e507ae3dfe95b -r d9c75786e6b43f18f9c1eaded635d5d6b236e2a7 test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -123,6 +123,22 @@ output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) self.assertEqual( output1_content.strip(), "Cat1Testlistified" ) + @skip_without_tool( "cat1" ) + def test_run_cat1_single_meta_wrapper( self ): + # Wrap input in a no-op meta parameter wrapper like Sam is planning to + # use for all UI API submissions. + history_id = self.dataset_populator.new_history() + new_dataset = self.dataset_populator.new_dataset( history_id, content='123' ) + inputs = dict( + input1={ 'batch': False, 'values': [ dataset_to_param( new_dataset ) ] }, + ) + outputs = self._cat1_outputs( history_id, inputs=inputs ) + self.assertEquals( len( outputs ), 1 ) + output1 = outputs[ 0 ] + output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) + self.assertEqual( output1_content.strip(), "123" ) + + @skip_without_tool( "validation_default" ) def test_validation( self ): history_id = self.dataset_populator.new_history() @@ -149,16 +165,31 @@ self.assertEqual( output1_content.strip(), "Cat1Test\nCat2Test" ) @skip_without_tool( "cat1" ) + def test_multirun_cat1_legacy( self ): + history_id, datasets = self._prepare_cat1_multirun() + inputs = { + "input1|__multirun__": datasets, + } + self._check_cat1_multirun( history_id, inputs ) + + @skip_without_tool( "cat1" ) def test_multirun_cat1( self ): + history_id, datasets = self._prepare_cat1_multirun() + inputs = { + "input1": { + 'batch': True, + 'values': datasets, + }, + } + self._check_cat1_multirun( history_id, inputs ) + + def _prepare_cat1_multirun( self ): history_id = self.dataset_populator.new_history() new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123' ) new_dataset2 = self.dataset_populator.new_dataset( history_id, content='456' ) - inputs = { - "input1|__multirun__": [ - dataset_to_param( new_dataset1 ), - dataset_to_param( new_dataset2 ), - ], - } + return history_id, [ dataset_to_param( new_dataset1 ), dataset_to_param( new_dataset2 ) ] + + def _check_cat1_multirun( self, history_id, inputs ): outputs = self._cat1_outputs( history_id, inputs=inputs ) self.assertEquals( len( outputs ), 2 ) output1 = outputs[ 0 ] @@ -168,6 +199,20 @@ self.assertEquals( output1_content.strip(), "123" ) self.assertEquals( output2_content.strip(), "456" ) + @skip_without_tool( "random_lines1" ) + def test_multirun_non_data_parameter( self ): + history_id = self.dataset_populator.new_history() + new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123\n456\n789' ) + inputs = { + 'input': dataset_to_param( new_dataset1 ), + 'num_lines': { 'batch': True, 'values': [ 1, 2, 3 ] } + } + outputs = self._run_and_get_outputs( 'random_lines1', history_id, inputs ) + # Assert we have three outputs with 1, 2, and 3 lines respectively. + assert len( outputs ) == 3 + outputs_contents = [ self.dataset_populator.get_history_dataset_content( history_id, dataset=o ).strip() for o in outputs ] + assert sorted( map( lambda c: len( c.split( "\n" ) ), outputs_contents ) ) == [ 1, 2, 3 ] + @skip_without_tool( "cat1" ) def test_multirun_in_repeat( self ): history_id = self.dataset_populator.new_history() @@ -191,35 +236,60 @@ self.assertEquals( output2_content.strip(), "Common\n456" ) @skip_without_tool( "cat1" ) - def test_multirun_on_multiple_inputs( self ): - history_id = self.dataset_populator.new_history() - new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123' ) - new_dataset2 = self.dataset_populator.new_dataset( history_id, content='456' ) - new_dataset3 = self.dataset_populator.new_dataset( history_id, content='789' ) - new_dataset4 = self.dataset_populator.new_dataset( history_id, content='0ab' ) + def test_multirun_on_multiple_inputs_legacy( self ): + history_id, first_two, second_two = self._setup_two_multiruns() inputs = { - "input1|__multirun__": [ - dataset_to_param( new_dataset1 ), - dataset_to_param( new_dataset2 ), - ], - 'queries_0|input2|__multirun__': [ - dataset_to_param( new_dataset3 ), - dataset_to_param( new_dataset4 ), - ], + "input1|__multirun__": first_two, + 'queries_0|input2|__multirun__': second_two, } outputs = self._cat1_outputs( history_id, inputs=inputs ) self.assertEquals( len( outputs ), 2 ) outputs_contents = [ self.dataset_populator.get_history_dataset_content( history_id, dataset=o ).strip() for o in outputs ] assert "123\n789" in outputs_contents assert "456\n0ab" in outputs_contents - # TODO: Once cross production (instead of linking inputs) is an option - # again redo test with these checks... - # self.assertEquals( len( outputs ), 4 ) - # assert "123\n0ab" in outputs_contents - # assert "456\n789" in outputs_contents @skip_without_tool( "cat1" ) - def test_map_over_collection( self ): + def test_multirun_on_multiple_inputs( self ): + history_id, first_two, second_two = self._setup_two_multiruns() + inputs = { + "input1": { 'batch': True, 'values': first_two }, + 'queries_0|input2': { 'batch': True, 'values': second_two }, + } + outputs = self._cat1_outputs( history_id, inputs=inputs ) + self.assertEquals( len( outputs ), 2 ) + outputs_contents = [ self.dataset_populator.get_history_dataset_content( history_id, dataset=o ).strip() for o in outputs ] + assert "123\n789" in outputs_contents + assert "456\n0ab" in outputs_contents + + @skip_without_tool( "cat1" ) + def test_multirun_on_multiple_inputs_unlinked( self ): + history_id, first_two, second_two = self._setup_two_multiruns() + inputs = { + "input1": { 'batch': True, 'linked': False, 'values': first_two }, + 'queries_0|input2': { 'batch': True, 'linked': False, 'values': second_two }, + } + outputs = self._cat1_outputs( history_id, inputs=inputs ) + outputs_contents = [ self.dataset_populator.get_history_dataset_content( history_id, dataset=o ).strip() for o in outputs ] + self.assertEquals( len( outputs ), 4 ) + assert "123\n789" in outputs_contents + assert "456\n0ab" in outputs_contents + assert "123\n0ab" in outputs_contents + assert "456\n789" in outputs_contents + + def _setup_two_multiruns( self ): + history_id = self.dataset_populator.new_history() + new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123' ) + new_dataset2 = self.dataset_populator.new_dataset( history_id, content='456' ) + new_dataset3 = self.dataset_populator.new_dataset( history_id, content='789' ) + new_dataset4 = self.dataset_populator.new_dataset( history_id, content='0ab' ) + return ( + history_id, + [ dataset_to_param( new_dataset1 ), dataset_to_param( new_dataset2 ) ], + [ dataset_to_param( new_dataset3 ), dataset_to_param( new_dataset4 ) ] + ) + + @skip_without_tool( "cat1" ) + def test_map_over_collection_legacy( self ): history_id = self.dataset_populator.new_history() hdca_id = self.__build_pair( history_id, [ "123", "456" ] ) inputs = { @@ -228,6 +298,18 @@ # first, next test method tests other. "input1|__collection_multirun__": hdca_id, } + self._run_and_check_simple_collection_mapping( history_id, inputs ) + + @skip_without_tool( "cat1" ) + def test_map_over_collection( self ): + history_id = self.dataset_populator.new_history() + hdca_id = self.__build_pair( history_id, [ "123", "456" ] ) + inputs = { + "input1": { 'batch': True, 'values': [ { 'src': 'hdca', 'id': hdca_id } ] }, + } + self._run_and_check_simple_collection_mapping( history_id, inputs ) + + def _run_and_check_simple_collection_mapping( self, history_id, inputs ): create = self._run_cat1( history_id, inputs=inputs, assert_ok=True ) outputs = create[ 'outputs' ] jobs = create[ 'jobs' ] @@ -243,12 +325,24 @@ self.assertEquals( output2_content.strip(), "456" ) @skip_without_tool( "cat1" ) - def test_map_over_nested_collections( self ): + def test_map_over_nested_collections_legacy( self ): history_id = self.dataset_populator.new_history() hdca_id = self.__build_nested_list( history_id ) inputs = { "input1|__collection_multirun__": dict( src="hdca", id=hdca_id ), } + self._check_simple_cat1_over_nested_collections( history_id, inputs ) + + @skip_without_tool( "cat1" ) + def test_map_over_nested_collections( self ): + history_id = self.dataset_populator.new_history() + hdca_id = self.__build_nested_list( history_id ) + inputs = { + "input1": { 'batch': True, 'values': [ dict( src="hdca", id=hdca_id ) ] }, + } + self._check_simple_cat1_over_nested_collections( history_id, inputs ) + + def _check_simple_cat1_over_nested_collections( self, history_id, inputs ): create = self._run_cat1( history_id, inputs=inputs, assert_ok=True ) outputs = create[ 'outputs' ] jobs = create[ 'jobs' ] @@ -271,7 +365,7 @@ self.assertEquals( outputs[ 0 ][ "id" ], first_object_forward_element[ "object" ][ "id" ] ) @skip_without_tool( "cat1" ) - def test_map_over_two_collections( self ): + def test_map_over_two_collections_legacy( self ): history_id = self.dataset_populator.new_history() hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) hdca2_id = self.__build_pair( history_id, [ "789", "0ab" ] ) @@ -279,7 +373,24 @@ "input1|__collection_multirun__": hdca1_id, "queries_0|input2|__collection_multirun__": hdca2_id, } - outputs = self._cat1_outputs( history_id, inputs=inputs ) + self._check_map_cat1_over_two_collections( history_id, inputs ) + + @skip_without_tool( "cat1" ) + def test_map_over_two_collections( self ): + history_id = self.dataset_populator.new_history() + hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) + hdca2_id = self.__build_pair( history_id, [ "789", "0ab" ] ) + inputs = { + "input1": { 'batch': True, 'values': [ {'src': 'hdca', 'id': hdca1_id } ] }, + "queries_0|input2": { 'batch': True, 'values': [ { 'src': 'hdca', 'id': hdca2_id } ] }, + } + self._check_map_cat1_over_two_collections( history_id, inputs ) + + def _check_map_cat1_over_two_collections( self, history_id, inputs ): + response = self._run_cat1( history_id, inputs ) + self._assert_status_code_is( response, 200 ) + response_object = response.json() + outputs = response_object[ 'outputs' ] self.assertEquals( len( outputs ), 2 ) output1 = outputs[ 0 ] output2 = outputs[ 1 ] @@ -288,6 +399,29 @@ self.assertEquals( output1_content.strip(), "123\n789" ) self.assertEquals( output2_content.strip(), "456\n0ab" ) + self.assertEquals( len( response_object[ 'jobs' ] ), 2 ) + self.assertEquals( len( response_object[ 'implicit_collections' ] ), 1 ) + + @skip_without_tool( "cat1" ) + def test_map_over_two_collections_unlinked( self ): + history_id = self.dataset_populator.new_history() + hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) + hdca2_id = self.__build_pair( history_id, [ "789", "0ab" ] ) + inputs = { + "input1": { 'batch': True, 'linked': False, 'values': [ {'src': 'hdca', 'id': hdca1_id } ] }, + "queries_0|input2": { 'batch': True, 'linked': False, 'values': [ { 'src': 'hdca', 'id': hdca2_id } ] }, + } + response = self._run_cat1( history_id, inputs ) + self._assert_status_code_is( response, 200 ) + response_object = response.json() + outputs = response_object[ 'outputs' ] + self.assertEquals( len( outputs ), 4 ) + + self.assertEquals( len( response_object[ 'jobs' ] ), 4 ) + # Implicit collections not created with unlinked inputs yet - this may + # be problematic. + self.assertEquals( len( response_object[ 'implicit_collections' ] ), 0 ) + @skip_without_tool( "cat1" ) def test_cannot_map_over_incompatible_collections( self ): history_id = self.dataset_populator.new_history() https://bitbucket.org/galaxy/galaxy-central/commits/e5e67860cbde/ Changeset: e5e67860cbde User: jmchilton Date: 2014-09-16 15:57:48+00:00 Summary: Allow mixing batch multi-running of collections and individual datasets. I wouldn't really recommend doing this per se - but probably don't want to prevent it either. Affected #: 2 files diff -r d9c75786e6b43f18f9c1eaded635d5d6b236e2a7 -r e5e67860cbde0edb272e1dbd225f4195e8b4785f lib/galaxy/tools/parameters/meta.py --- a/lib/galaxy/tools/parameters/meta.py +++ b/lib/galaxy/tools/parameters/meta.py @@ -36,7 +36,11 @@ values = value return classification, values + from galaxy.dataset_collections import matching + collections_to_match = matching.CollectionsToMatch() + def classifier( input_key ): + collection_multirun_key = "%s|__collection_multirun__" % input_key multirun_key = "%s|__multirun__" % input_key if multirun_key in incoming: multi_value = util.listify( incoming[ multirun_key ] ) @@ -46,16 +50,8 @@ if len( multi_value ) == 0: multi_value = None return permutations.input_classification.SINGLE, multi_value[ 0 ] - else: - return classifiy_unmodified_parameter( input_key ) - - from galaxy.dataset_collections import matching - collections_to_match = matching.CollectionsToMatch() - - def collection_classifier( input_key ): - multirun_key = "%s|__collection_multirun__" % input_key - if multirun_key in incoming: - incoming_val = incoming[ multirun_key ] + elif collection_multirun_key in incoming: + incoming_val = incoming[ collection_multirun_key ] values = __expand_collection_parameter( trans, input_key, incoming_val, collections_to_match ) return permutations.input_classification.MATCHED, values else: @@ -87,24 +83,16 @@ else: continue else: - # Old-style batching (remove someday - didn't live in API long?) - multirun_found = try_replace_key( key, "|__multirun__" ) or multirun_found - collection_multirun_found = try_replace_key( key, "|__collection_multirun__" ) or collection_multirun_found + # Old-style batching (remove someday? - pretty hacky and didn't live in API long) + try_replace_key( key, "|__multirun__" ) or multirun_found + try_replace_key( key, "|__collection_multirun__" ) or collection_multirun_found - if sum( [ 1 if f else 0 for f in [ multirun_found, collection_multirun_found ] ] ) > 1: - # In theory doable, but to complicated for a first pass. - message = "Cannot specify parallel execution across both multiple datasets and dataset collections." - raise exceptions.ToolMetaParameterException( message ) - - if multirun_found: - return permutations.expand_multi_inputs( incoming_template, classifier ), None + expanded_incomings = permutations.expand_multi_inputs( incoming_template, classifier ) + if collections_to_match.has_collections(): + collection_info = trans.app.dataset_collections_service.match_collections( collections_to_match ) else: - expanded_incomings = permutations.expand_multi_inputs( incoming_template, collection_classifier ) - if collections_to_match.has_collections(): - collection_info = trans.app.dataset_collections_service.match_collections( collections_to_match ) - else: - collection_info = None - return expanded_incomings, collection_info + collection_info = None + return expanded_incomings, collection_info def __expand_collection_parameter( trans, input_key, incoming_val, collections_to_match ): diff -r d9c75786e6b43f18f9c1eaded635d5d6b236e2a7 -r e5e67860cbde0edb272e1dbd225f4195e8b4785f test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -423,6 +423,26 @@ self.assertEquals( len( response_object[ 'implicit_collections' ] ), 0 ) @skip_without_tool( "cat1" ) + def test_map_over_collected_and_individual_datasets( self ): + history_id = self.dataset_populator.new_history() + hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) + new_dataset1 = self.dataset_populator.new_dataset( history_id, content='789' ) + new_dataset2 = self.dataset_populator.new_dataset( history_id, content='0ab' ) + + inputs = { + "input1": { 'batch': True, 'values': [ {'src': 'hdca', 'id': hdca1_id } ] }, + "queries_0|input2": { 'batch': True, 'values': [ dataset_to_param( new_dataset1 ), dataset_to_param( new_dataset2 ) ] }, + } + response = self._run_cat1( history_id, inputs ) + self._assert_status_code_is( response, 200 ) + response_object = response.json() + outputs = response_object[ 'outputs' ] + self.assertEquals( len( outputs ), 2 ) + + self.assertEquals( len( response_object[ 'jobs' ] ), 2 ) + self.assertEquals( len( response_object[ 'implicit_collections' ] ), 1 ) + + @skip_without_tool( "cat1" ) def test_cannot_map_over_incompatible_collections( self ): history_id = self.dataset_populator.new_history() hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) https://bitbucket.org/galaxy/galaxy-central/commits/a7f6b4b1468c/ Changeset: a7f6b4b1468c User: jmchilton Date: 2014-09-16 15:57:48+00:00 Summary: Simplified dataset collection reductions via API. Old tool form needed to encode every value as a string so I had done "__collection_reduction__|<hdca_id>" to distinguish that value from an "<hda_id>" - since hdca and hdas can have the same encoded ids. The new tool form API is going to use the API which allows for richer object representations - so {"src": "hda", "id": "<hda_id>"} versus {"src": "hdca", "id": "<hdca_id>"} should be enough to distinguish between passing an HDA and an HDCA to a multiple input data parameter. Affected #: 2 files diff -r e5e67860cbde0edb272e1dbd225f4195e8b4785f -r a7f6b4b1468cc52237981a7e7c1645f4720f8b1e lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -1887,6 +1887,11 @@ elif isinstance( value, dict ) and 'src' in value and 'id' in value: if value['src'] == 'hda': rval = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( trans.app.security.decode_id(value['id']) ) + elif value['src'] == 'hdca': + decoded_id = trans.app.security.decode_id( value[ 'id' ] ) + rval = trans.sa_session.query( trans.app.model.HistoryDatasetCollectionAssociation ).get( decoded_id ) + else: + raise ValueError("Unknown input source %s passed to job submission API." % value['src']) elif str( value ).startswith( "__collection_reduce__|" ): encoded_id = str( value )[ len( "__collection_reduce__|" ): ] decoded_id = trans.app.security.decode_id( encoded_id ) diff -r e5e67860cbde0edb272e1dbd225f4195e8b4785f -r a7f6b4b1468cc52237981a7e7c1645f4720f8b1e test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -457,7 +457,7 @@ assert run_response.status_code >= 400 @skip_without_tool( "multi_data_param" ) - def test_reduce_collections( self ): + def test_reduce_collections_legacy( self ): history_id = self.dataset_populator.new_history() hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) hdca2_id = self.dataset_collection_populator.create_list_in_history( history_id ).json()[ "id" ] @@ -465,6 +465,20 @@ "f1": "__collection_reduce__|%s" % hdca1_id, "f2": "__collection_reduce__|%s" % hdca2_id, } + self._check_simple_reduce_job( history_id, inputs ) + + @skip_without_tool( "multi_data_param" ) + def test_reduce_collections( self ): + history_id = self.dataset_populator.new_history() + hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) + hdca2_id = self.dataset_collection_populator.create_list_in_history( history_id ).json()[ "id" ] + inputs = { + "f1": { 'src': 'hdca', 'id': hdca1_id }, + "f2": { 'src': 'hdca', 'id': hdca2_id }, + } + self._check_simple_reduce_job( history_id, inputs ) + + def _check_simple_reduce_job( self, history_id, inputs ): create = self._run( "multi_data_param", history_id, inputs, assert_ok=True ) outputs = create[ 'outputs' ] jobs = create[ 'jobs' ] https://bitbucket.org/galaxy/galaxy-central/commits/6185cea44918/ Changeset: 6185cea44918 User: jmchilton Date: 2014-09-16 15:57:48+00:00 Summary: Redo API for subcollection mapping steps in tools. Like the reductions - was previously constrained by sequeezing these values into simple strings - now the tool form will target the API I think this expanded version is a little more straight-forward (though verbose). Adds consistency with rest of the tool form API changes. Affected #: 2 files diff -r a7f6b4b1468cc52237981a7e7c1645f4720f8b1e -r 6185cea4491868aea6207879e88118402f4344d3 lib/galaxy/tools/parameters/meta.py --- a/lib/galaxy/tools/parameters/meta.py +++ b/lib/galaxy/tools/parameters/meta.py @@ -106,9 +106,10 @@ if src != "hdca": raise exceptions.ToolMetaParameterException( "Invalid dataset collection source type %s" % src ) encoded_hdc_id = incoming_val[ "id" ] + subcollection_type = incoming_val.get( 'map_over_type', None ) except TypeError: encoded_hdc_id = incoming_val - subcollection_type = None + subcollection_type = None hdc_id = trans.app.security.decode_id( encoded_hdc_id ) hdc = trans.sa_session.query( model.HistoryDatasetCollectionAssociation ).get( hdc_id ) collections_to_match.add( input_key, hdc, subcollection_type=subcollection_type ) diff -r a7f6b4b1468cc52237981a7e7c1645f4720f8b1e -r 6185cea4491868aea6207879e88118402f4344d3 test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -492,12 +492,27 @@ assert len( output2_content.strip().split("\n") ) == 3, output2_content @skip_without_tool( "collection_paired_test" ) - def test_subcollection_mapping( self ): + def test_subcollection_mapping_legacy( self ): history_id = self.dataset_populator.new_history() hdca_list_id = self.__build_nested_list( history_id ) inputs = { "f1|__collection_multirun__": "%s|paired" % hdca_list_id } + self._check_simple_subcollection_mapping( history_id, inputs ) + + @skip_without_tool( "collection_paired_test" ) + def test_subcollection_mapping( self ): + history_id = self.dataset_populator.new_history() + hdca_list_id = self.__build_nested_list( history_id ) + inputs = { + "f1": { + 'batch': True, + 'values': [ { 'src': 'hdca', 'map_over_type': 'paired', 'id': hdca_list_id }], + } + } + self._check_simple_subcollection_mapping( history_id, inputs ) + + def _check_simple_subcollection_mapping( self, history_id, inputs ): # Following wait not really needed - just getting so many database # locked errors with sqlite. self.dataset_populator.wait_for_history( history_id, assert_ok=True ) @@ -511,7 +526,7 @@ assert output2_content.strip() == "789\n0ab", output2_content @skip_without_tool( "collection_mixed_param" ) - def test_combined_mapping_and_subcollection_mapping( self ): + def test_combined_mapping_and_subcollection_mapping_legacy( self ): history_id = self.dataset_populator.new_history() nested_list_id = self.__build_nested_list( history_id ) create_response = self.dataset_collection_populator.create_list_in_history( history_id, contents=["xxx", "yyy"] ) @@ -520,6 +535,27 @@ "f1|__collection_multirun__": "%s|paired" % nested_list_id, "f2|__collection_multirun__": list_id, } + self._check_combined_mapping_and_subcollection_mapping( history_id, inputs ) + + @skip_without_tool( "collection_mixed_param" ) + def test_combined_mapping_and_subcollection_mapping( self ): + history_id = self.dataset_populator.new_history() + nested_list_id = self.__build_nested_list( history_id ) + create_response = self.dataset_collection_populator.create_list_in_history( history_id, contents=["xxx", "yyy"] ) + list_id = create_response.json()[ "id" ] + inputs = { + "f1": { + 'batch': True, + 'values': [ { 'src': 'hdca', 'map_over_type': 'paired', 'id': nested_list_id }], + }, + "f2": { + 'batch': True, + 'values': [ { 'src': 'hdca', 'id': list_id }], + }, + } + self._check_combined_mapping_and_subcollection_mapping( history_id, inputs ) + + def _check_combined_mapping_and_subcollection_mapping( self, history_id, inputs ): self.dataset_populator.wait_for_history( history_id, assert_ok=True ) outputs = self._run_and_get_outputs( "collection_mixed_param", history_id, inputs ) assert len( outputs ), 2 https://bitbucket.org/galaxy/galaxy-central/commits/b8bc9b8b9806/ Changeset: b8bc9b8b9806 User: jmchilton Date: 2014-09-16 15:57:48+00:00 Summary: Add tools test for data_collection input parameter specification. Affected #: 1 file diff -r 6185cea4491868aea6207879e88118402f4344d3 -r b8bc9b8b980689cbd32ad999675a31203574afb9 test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -148,6 +148,20 @@ response = self._run( "validation_default", history_id, inputs ) self._assert_status_code_is( response, 400 ) + @skip_without_tool( "collection_paired_test" ) + def test_collection_parameter( self ): + history_id = self.dataset_populator.new_history() + hdca_id = self.__build_pair( history_id, [ "123", "456" ] ) + inputs = { + "f1": { "src": "hdca", "id": hdca_id }, + } + output = self._run( "collection_paired_test", history_id, inputs, assert_ok=True ) + assert len( output[ 'jobs' ] ) == 1 + assert len( output[ 'implicit_collections' ] ) == 0 + assert len( output[ 'outputs' ] ) == 1 + contents = self.dataset_populator.get_history_dataset_content( history_id, hid=4 ) + assert contents.strip() == "123\n456", contents + @skip_without_tool( "cat1" ) def test_run_cat1_with_two_inputs( self ): # Run tool with an multiple data parameter and grouping (repeat) https://bitbucket.org/galaxy/galaxy-central/commits/f5ea6456c64e/ Changeset: f5ea6456c64e User: jmchilton Date: 2014-09-16 15:57:48+00:00 Summary: PEP-8 fix. Affected #: 1 file diff -r b8bc9b8b980689cbd32ad999675a31203574afb9 -r f5ea6456c64ece76db8528eee5c88eb839cbaa34 test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -138,7 +138,6 @@ output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) self.assertEqual( output1_content.strip(), "123" ) - @skip_without_tool( "validation_default" ) def test_validation( self ): history_id = self.dataset_populator.new_history() https://bitbucket.org/galaxy/galaxy-central/commits/e66166960524/ Changeset: e66166960524 User: jmchilton Date: 2014-09-16 15:57:48+00:00 Summary: Update map/reduce workflow tests for newer API constructs. Old ones still work - but I wanted to verify the new changes didn't cause any unintended consequences with workflows. Affected #: 1 file diff -r f5ea6456c64ece76db8528eee5c88eb839cbaa34 -r e6616696052491bf563b690865f307f2de96d57c test/api/test_workflows.py --- a/test/api/test_workflows.py +++ b/test/api/test_workflows.py @@ -350,13 +350,13 @@ hdca = self.dataset_collection_populator.create_pair_in_history( history_id, contents=["1 2 3\n4 5 6", "7 8 9\n10 11 10"] ).json() hdca_id = hdca[ "id" ] inputs1 = { - "input|__collection_multirun__": hdca_id, + "input": { "batch": True, "values": [ { "src": "hdca", "id": hdca_id } ] }, "num_lines": 2 } implicit_hdca1, job_id1 = self._run_tool_get_collection_and_job_id( history_id, "random_lines1", inputs1 ) inputs2 = { - "f1": "__collection_reduce__|%s" % ( implicit_hdca1[ "id" ] ), - "f2": "__collection_reduce__|%s" % ( implicit_hdca1[ "id" ] ) + "f1": { "src": "hdca", "id": implicit_hdca1[ "id" ] }, + "f2": { "src": "hdca", "id": implicit_hdca1[ "id" ] }, } reduction_run_output = self.dataset_populator.run_tool( tool_id="multi_data_param", @@ -402,12 +402,12 @@ hdca = self.dataset_collection_populator.create_pair_in_history( history_id, contents=["1 2 3\n4 5 6", "7 8 9\n10 11 10"] ).json() hdca_id = hdca[ "id" ] inputs1 = { - "input|__collection_multirun__": hdca_id, + "input": { "batch": True, "values": [ { "src": "hdca", "id": hdca_id } ] }, "num_lines": 2 } implicit_hdca1, job_id1 = self._run_tool_get_collection_and_job_id( history_id, "random_lines1", inputs1 ) inputs2 = { - "input|__collection_multirun__": implicit_hdca1[ "id" ], + "input": { "batch": True, "values": [ { "src": "hdca", "id": implicit_hdca1[ "id" ] } ] }, "num_lines": 1 } _, job_id2 = self._run_tool_get_collection_and_job_id( history_id, "random_lines1", inputs2 ) https://bitbucket.org/galaxy/galaxy-central/commits/708d988d3e7b/ Changeset: 708d988d3e7b User: jmchilton Date: 2014-09-16 18:51:31+00:00 Summary: Fix spelling error in method name (thanks Nicola!). Affected #: 1 file diff -r e6616696052491bf563b690865f307f2de96d57c -r 708d988d3e7bf0ea9c91e0c2b2cbdc53037d3544 lib/galaxy/tools/parameters/meta.py --- a/lib/galaxy/tools/parameters/meta.py +++ b/lib/galaxy/tools/parameters/meta.py @@ -14,7 +14,7 @@ execution). """ - def classifiy_unmodified_parameter( input_key ): + def classify_unmodified_parameter( input_key ): value = incoming[ input_key ] if isinstance( value, dict ) and 'values' in value: # Explicit meta wrapper for inputs... @@ -55,7 +55,7 @@ values = __expand_collection_parameter( trans, input_key, incoming_val, collections_to_match ) return permutations.input_classification.MATCHED, values else: - return classifiy_unmodified_parameter( input_key ) + return classify_unmodified_parameter( input_key ) # Stick an unexpanded version of multirun keys so they can be replaced, # by expand_mult_inputs. https://bitbucket.org/galaxy/galaxy-central/commits/df92b55d194a/ Changeset: df92b55d194a User: jmchilton Date: 2014-09-22 14:24:39+00:00 Summary: Merged in jmchilton/galaxy-central-fork-1 (pull request #496) More consistent tool API for map/reduce operations. Affected #: 4 files diff -r 42f677bab2e4880b62758614fcb2c7dc27013788 -r df92b55d194a4d67a1b863c85044eb91c138c74b lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -1887,6 +1887,11 @@ elif isinstance( value, dict ) and 'src' in value and 'id' in value: if value['src'] == 'hda': rval = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( trans.app.security.decode_id(value['id']) ) + elif value['src'] == 'hdca': + decoded_id = trans.app.security.decode_id( value[ 'id' ] ) + rval = trans.sa_session.query( trans.app.model.HistoryDatasetCollectionAssociation ).get( decoded_id ) + else: + raise ValueError("Unknown input source %s passed to job submission API." % value['src']) elif str( value ).startswith( "__collection_reduce__|" ): encoded_id = str( value )[ len( "__collection_reduce__|" ): ] decoded_id = trans.app.security.decode_id( encoded_id ) @@ -1903,6 +1908,10 @@ raise ValueError( "The previously selected dataset has been previously deleted" ) if hasattr( v, "dataset" ) and v.dataset.state in [ galaxy.model.Dataset.states.ERROR, galaxy.model.Dataset.states.DISCARDED ]: raise ValueError( "The previously selected dataset has entered an unusable state" ) + if not self.multiple: + if len( values ) > 1: + raise ValueError( "More than one dataset supplied to single input dataset parameter.") + rval = values[ 0 ] return rval def to_string( self, value, app ): diff -r 42f677bab2e4880b62758614fcb2c7dc27013788 -r df92b55d194a4d67a1b863c85044eb91c138c74b lib/galaxy/tools/parameters/meta.py --- a/lib/galaxy/tools/parameters/meta.py +++ b/lib/galaxy/tools/parameters/meta.py @@ -14,7 +14,33 @@ execution). """ + def classify_unmodified_parameter( input_key ): + value = incoming[ input_key ] + if isinstance( value, dict ) and 'values' in value: + # Explicit meta wrapper for inputs... + is_batch = value.get( 'batch', False ) + is_linked = value.get( 'linked', True ) + if is_batch and is_linked: + classification = permutations.input_classification.MATCHED + elif is_batch: + classification = permutations.input_classification.MULTIPLIED + else: + classification = permutations.input_classification.SINGLE + if __collection_multirun_parameter( value ): + collection_value = value[ 'values' ][ 0 ] + values = __expand_collection_parameter( trans, input_key, collection_value, collections_to_match ) + else: + values = value[ 'values' ] + else: + classification = permutations.input_classification.SINGLE + values = value + return classification, values + + from galaxy.dataset_collections import matching + collections_to_match = matching.CollectionsToMatch() + def classifier( input_key ): + collection_multirun_key = "%s|__collection_multirun__" % input_key multirun_key = "%s|__multirun__" % input_key if multirun_key in incoming: multi_value = util.listify( incoming[ multirun_key ] ) @@ -24,41 +50,12 @@ if len( multi_value ) == 0: multi_value = None return permutations.input_classification.SINGLE, multi_value[ 0 ] + elif collection_multirun_key in incoming: + incoming_val = incoming[ collection_multirun_key ] + values = __expand_collection_parameter( trans, input_key, incoming_val, collections_to_match ) + return permutations.input_classification.MATCHED, values else: - return permutations.input_classification.SINGLE, incoming[ input_key ] - - from galaxy.dataset_collections import matching - collections_to_match = matching.CollectionsToMatch() - - def collection_classifier( input_key ): - multirun_key = "%s|__collection_multirun__" % input_key - if multirun_key in incoming: - incoming_val = incoming[ multirun_key ] - # If subcollectin multirun of data_collection param - value will - # be "hdca_id|subcollection_type" else it will just be hdca_id - if "|" in incoming_val: - encoded_hdc_id, subcollection_type = incoming_val.split( "|", 1 ) - else: - try: - src = incoming_val[ "src" ] - if src != "hdca": - raise exceptions.ToolMetaParameterException( "Invalid dataset collection source type %s" % src ) - encoded_hdc_id = incoming_val[ "id" ] - except TypeError: - encoded_hdc_id = incoming_val - subcollection_type = None - hdc_id = trans.app.security.decode_id( encoded_hdc_id ) - hdc = trans.sa_session.query( model.HistoryDatasetCollectionAssociation ).get( hdc_id ) - collections_to_match.add( input_key, hdc, subcollection_type=subcollection_type ) - if subcollection_type is not None: - from galaxy.dataset_collections import subcollections - subcollection_elements = subcollections.split_dataset_collection_instance( hdc, subcollection_type ) - return permutations.input_classification.MATCHED, subcollection_elements - else: - hdas = hdc.collection.dataset_instances - return permutations.input_classification.MATCHED, hdas - else: - return permutations.input_classification.SINGLE, incoming[ input_key ] + return classify_unmodified_parameter( input_key ) # Stick an unexpanded version of multirun keys so they can be replaced, # by expand_mult_inputs. @@ -76,20 +73,59 @@ multirun_found = False collection_multirun_found = False for key, value in incoming.iteritems(): - multirun_found = try_replace_key( key, "|__multirun__" ) or multirun_found - collection_multirun_found = try_replace_key( key, "|__collection_multirun__" ) or collection_multirun_found + if isinstance( value, dict ) and 'values' in value: + batch = value.get( 'batch', False ) + if batch: + if __collection_multirun_parameter( value ): + collection_multirun_found = True + else: + multirun_found = True + else: + continue + else: + # Old-style batching (remove someday? - pretty hacky and didn't live in API long) + try_replace_key( key, "|__multirun__" ) or multirun_found + try_replace_key( key, "|__collection_multirun__" ) or collection_multirun_found - if sum( [ 1 if f else 0 for f in [ multirun_found, collection_multirun_found ] ] ) > 1: - # In theory doable, but to complicated for a first pass. - message = "Cannot specify parallel execution across both multiple datasets and dataset collections." - raise exceptions.ToolMetaParameterException( message ) + expanded_incomings = permutations.expand_multi_inputs( incoming_template, classifier ) + if collections_to_match.has_collections(): + collection_info = trans.app.dataset_collections_service.match_collections( collections_to_match ) + else: + collection_info = None + return expanded_incomings, collection_info - if multirun_found: - return permutations.expand_multi_inputs( incoming_template, classifier ), None + +def __expand_collection_parameter( trans, input_key, incoming_val, collections_to_match ): + # If subcollectin multirun of data_collection param - value will + # be "hdca_id|subcollection_type" else it will just be hdca_id + if "|" in incoming_val: + encoded_hdc_id, subcollection_type = incoming_val.split( "|", 1 ) else: - expanded_incomings = permutations.expand_multi_inputs( incoming_template, collection_classifier ) - if collections_to_match.has_collections(): - collection_info = trans.app.dataset_collections_service.match_collections( collections_to_match ) - else: - collection_info = None - return expanded_incomings, collection_info + try: + src = incoming_val[ "src" ] + if src != "hdca": + raise exceptions.ToolMetaParameterException( "Invalid dataset collection source type %s" % src ) + encoded_hdc_id = incoming_val[ "id" ] + subcollection_type = incoming_val.get( 'map_over_type', None ) + except TypeError: + encoded_hdc_id = incoming_val + subcollection_type = None + hdc_id = trans.app.security.decode_id( encoded_hdc_id ) + hdc = trans.sa_session.query( model.HistoryDatasetCollectionAssociation ).get( hdc_id ) + collections_to_match.add( input_key, hdc, subcollection_type=subcollection_type ) + if subcollection_type is not None: + from galaxy.dataset_collections import subcollections + subcollection_elements = subcollections.split_dataset_collection_instance( hdc, subcollection_type ) + return subcollection_elements + else: + hdas = hdc.collection.dataset_instances + return hdas + + +def __collection_multirun_parameter( value ): + batch_values = util.listify( value[ 'values' ] ) + if len( batch_values ) == 1: + batch_over = batch_values[ 0 ] + if isinstance( batch_over, dict ) and ('src' in batch_over) and (batch_over[ 'src' ] == 'hdca'): + return True + return False diff -r 42f677bab2e4880b62758614fcb2c7dc27013788 -r df92b55d194a4d67a1b863c85044eb91c138c74b test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -109,6 +109,35 @@ output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) self.assertEqual( output1_content.strip(), "Cat1Test" ) + @skip_without_tool( "cat1" ) + def test_run_cat1_listified_param( self ): + # Run simple non-upload tool with an input data parameter. + history_id = self.dataset_populator.new_history() + new_dataset = self.dataset_populator.new_dataset( history_id, content='Cat1Testlistified' ) + inputs = dict( + input1=[dataset_to_param( new_dataset )], + ) + outputs = self._cat1_outputs( history_id, inputs=inputs ) + self.assertEquals( len( outputs ), 1 ) + output1 = outputs[ 0 ] + output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) + self.assertEqual( output1_content.strip(), "Cat1Testlistified" ) + + @skip_without_tool( "cat1" ) + def test_run_cat1_single_meta_wrapper( self ): + # Wrap input in a no-op meta parameter wrapper like Sam is planning to + # use for all UI API submissions. + history_id = self.dataset_populator.new_history() + new_dataset = self.dataset_populator.new_dataset( history_id, content='123' ) + inputs = dict( + input1={ 'batch': False, 'values': [ dataset_to_param( new_dataset ) ] }, + ) + outputs = self._cat1_outputs( history_id, inputs=inputs ) + self.assertEquals( len( outputs ), 1 ) + output1 = outputs[ 0 ] + output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) + self.assertEqual( output1_content.strip(), "123" ) + @skip_without_tool( "validation_default" ) def test_validation( self ): history_id = self.dataset_populator.new_history() @@ -118,6 +147,20 @@ response = self._run( "validation_default", history_id, inputs ) self._assert_status_code_is( response, 400 ) + @skip_without_tool( "collection_paired_test" ) + def test_collection_parameter( self ): + history_id = self.dataset_populator.new_history() + hdca_id = self.__build_pair( history_id, [ "123", "456" ] ) + inputs = { + "f1": { "src": "hdca", "id": hdca_id }, + } + output = self._run( "collection_paired_test", history_id, inputs, assert_ok=True ) + assert len( output[ 'jobs' ] ) == 1 + assert len( output[ 'implicit_collections' ] ) == 0 + assert len( output[ 'outputs' ] ) == 1 + contents = self.dataset_populator.get_history_dataset_content( history_id, hid=4 ) + assert contents.strip() == "123\n456", contents + @skip_without_tool( "cat1" ) def test_run_cat1_with_two_inputs( self ): # Run tool with an multiple data parameter and grouping (repeat) @@ -135,16 +178,31 @@ self.assertEqual( output1_content.strip(), "Cat1Test\nCat2Test" ) @skip_without_tool( "cat1" ) + def test_multirun_cat1_legacy( self ): + history_id, datasets = self._prepare_cat1_multirun() + inputs = { + "input1|__multirun__": datasets, + } + self._check_cat1_multirun( history_id, inputs ) + + @skip_without_tool( "cat1" ) def test_multirun_cat1( self ): + history_id, datasets = self._prepare_cat1_multirun() + inputs = { + "input1": { + 'batch': True, + 'values': datasets, + }, + } + self._check_cat1_multirun( history_id, inputs ) + + def _prepare_cat1_multirun( self ): history_id = self.dataset_populator.new_history() new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123' ) new_dataset2 = self.dataset_populator.new_dataset( history_id, content='456' ) - inputs = { - "input1|__multirun__": [ - dataset_to_param( new_dataset1 ), - dataset_to_param( new_dataset2 ), - ], - } + return history_id, [ dataset_to_param( new_dataset1 ), dataset_to_param( new_dataset2 ) ] + + def _check_cat1_multirun( self, history_id, inputs ): outputs = self._cat1_outputs( history_id, inputs=inputs ) self.assertEquals( len( outputs ), 2 ) output1 = outputs[ 0 ] @@ -154,6 +212,20 @@ self.assertEquals( output1_content.strip(), "123" ) self.assertEquals( output2_content.strip(), "456" ) + @skip_without_tool( "random_lines1" ) + def test_multirun_non_data_parameter( self ): + history_id = self.dataset_populator.new_history() + new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123\n456\n789' ) + inputs = { + 'input': dataset_to_param( new_dataset1 ), + 'num_lines': { 'batch': True, 'values': [ 1, 2, 3 ] } + } + outputs = self._run_and_get_outputs( 'random_lines1', history_id, inputs ) + # Assert we have three outputs with 1, 2, and 3 lines respectively. + assert len( outputs ) == 3 + outputs_contents = [ self.dataset_populator.get_history_dataset_content( history_id, dataset=o ).strip() for o in outputs ] + assert sorted( map( lambda c: len( c.split( "\n" ) ), outputs_contents ) ) == [ 1, 2, 3 ] + @skip_without_tool( "cat1" ) def test_multirun_in_repeat( self ): history_id = self.dataset_populator.new_history() @@ -177,35 +249,60 @@ self.assertEquals( output2_content.strip(), "Common\n456" ) @skip_without_tool( "cat1" ) - def test_multirun_on_multiple_inputs( self ): - history_id = self.dataset_populator.new_history() - new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123' ) - new_dataset2 = self.dataset_populator.new_dataset( history_id, content='456' ) - new_dataset3 = self.dataset_populator.new_dataset( history_id, content='789' ) - new_dataset4 = self.dataset_populator.new_dataset( history_id, content='0ab' ) + def test_multirun_on_multiple_inputs_legacy( self ): + history_id, first_two, second_two = self._setup_two_multiruns() inputs = { - "input1|__multirun__": [ - dataset_to_param( new_dataset1 ), - dataset_to_param( new_dataset2 ), - ], - 'queries_0|input2|__multirun__': [ - dataset_to_param( new_dataset3 ), - dataset_to_param( new_dataset4 ), - ], + "input1|__multirun__": first_two, + 'queries_0|input2|__multirun__': second_two, } outputs = self._cat1_outputs( history_id, inputs=inputs ) self.assertEquals( len( outputs ), 2 ) outputs_contents = [ self.dataset_populator.get_history_dataset_content( history_id, dataset=o ).strip() for o in outputs ] assert "123\n789" in outputs_contents assert "456\n0ab" in outputs_contents - # TODO: Once cross production (instead of linking inputs) is an option - # again redo test with these checks... - # self.assertEquals( len( outputs ), 4 ) - # assert "123\n0ab" in outputs_contents - # assert "456\n789" in outputs_contents @skip_without_tool( "cat1" ) - def test_map_over_collection( self ): + def test_multirun_on_multiple_inputs( self ): + history_id, first_two, second_two = self._setup_two_multiruns() + inputs = { + "input1": { 'batch': True, 'values': first_two }, + 'queries_0|input2': { 'batch': True, 'values': second_two }, + } + outputs = self._cat1_outputs( history_id, inputs=inputs ) + self.assertEquals( len( outputs ), 2 ) + outputs_contents = [ self.dataset_populator.get_history_dataset_content( history_id, dataset=o ).strip() for o in outputs ] + assert "123\n789" in outputs_contents + assert "456\n0ab" in outputs_contents + + @skip_without_tool( "cat1" ) + def test_multirun_on_multiple_inputs_unlinked( self ): + history_id, first_two, second_two = self._setup_two_multiruns() + inputs = { + "input1": { 'batch': True, 'linked': False, 'values': first_two }, + 'queries_0|input2': { 'batch': True, 'linked': False, 'values': second_two }, + } + outputs = self._cat1_outputs( history_id, inputs=inputs ) + outputs_contents = [ self.dataset_populator.get_history_dataset_content( history_id, dataset=o ).strip() for o in outputs ] + self.assertEquals( len( outputs ), 4 ) + assert "123\n789" in outputs_contents + assert "456\n0ab" in outputs_contents + assert "123\n0ab" in outputs_contents + assert "456\n789" in outputs_contents + + def _setup_two_multiruns( self ): + history_id = self.dataset_populator.new_history() + new_dataset1 = self.dataset_populator.new_dataset( history_id, content='123' ) + new_dataset2 = self.dataset_populator.new_dataset( history_id, content='456' ) + new_dataset3 = self.dataset_populator.new_dataset( history_id, content='789' ) + new_dataset4 = self.dataset_populator.new_dataset( history_id, content='0ab' ) + return ( + history_id, + [ dataset_to_param( new_dataset1 ), dataset_to_param( new_dataset2 ) ], + [ dataset_to_param( new_dataset3 ), dataset_to_param( new_dataset4 ) ] + ) + + @skip_without_tool( "cat1" ) + def test_map_over_collection_legacy( self ): history_id = self.dataset_populator.new_history() hdca_id = self.__build_pair( history_id, [ "123", "456" ] ) inputs = { @@ -214,6 +311,18 @@ # first, next test method tests other. "input1|__collection_multirun__": hdca_id, } + self._run_and_check_simple_collection_mapping( history_id, inputs ) + + @skip_without_tool( "cat1" ) + def test_map_over_collection( self ): + history_id = self.dataset_populator.new_history() + hdca_id = self.__build_pair( history_id, [ "123", "456" ] ) + inputs = { + "input1": { 'batch': True, 'values': [ { 'src': 'hdca', 'id': hdca_id } ] }, + } + self._run_and_check_simple_collection_mapping( history_id, inputs ) + + def _run_and_check_simple_collection_mapping( self, history_id, inputs ): create = self._run_cat1( history_id, inputs=inputs, assert_ok=True ) outputs = create[ 'outputs' ] jobs = create[ 'jobs' ] @@ -229,12 +338,24 @@ self.assertEquals( output2_content.strip(), "456" ) @skip_without_tool( "cat1" ) - def test_map_over_nested_collections( self ): + def test_map_over_nested_collections_legacy( self ): history_id = self.dataset_populator.new_history() hdca_id = self.__build_nested_list( history_id ) inputs = { "input1|__collection_multirun__": dict( src="hdca", id=hdca_id ), } + self._check_simple_cat1_over_nested_collections( history_id, inputs ) + + @skip_without_tool( "cat1" ) + def test_map_over_nested_collections( self ): + history_id = self.dataset_populator.new_history() + hdca_id = self.__build_nested_list( history_id ) + inputs = { + "input1": { 'batch': True, 'values': [ dict( src="hdca", id=hdca_id ) ] }, + } + self._check_simple_cat1_over_nested_collections( history_id, inputs ) + + def _check_simple_cat1_over_nested_collections( self, history_id, inputs ): create = self._run_cat1( history_id, inputs=inputs, assert_ok=True ) outputs = create[ 'outputs' ] jobs = create[ 'jobs' ] @@ -257,7 +378,7 @@ self.assertEquals( outputs[ 0 ][ "id" ], first_object_forward_element[ "object" ][ "id" ] ) @skip_without_tool( "cat1" ) - def test_map_over_two_collections( self ): + def test_map_over_two_collections_legacy( self ): history_id = self.dataset_populator.new_history() hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) hdca2_id = self.__build_pair( history_id, [ "789", "0ab" ] ) @@ -265,7 +386,24 @@ "input1|__collection_multirun__": hdca1_id, "queries_0|input2|__collection_multirun__": hdca2_id, } - outputs = self._cat1_outputs( history_id, inputs=inputs ) + self._check_map_cat1_over_two_collections( history_id, inputs ) + + @skip_without_tool( "cat1" ) + def test_map_over_two_collections( self ): + history_id = self.dataset_populator.new_history() + hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) + hdca2_id = self.__build_pair( history_id, [ "789", "0ab" ] ) + inputs = { + "input1": { 'batch': True, 'values': [ {'src': 'hdca', 'id': hdca1_id } ] }, + "queries_0|input2": { 'batch': True, 'values': [ { 'src': 'hdca', 'id': hdca2_id } ] }, + } + self._check_map_cat1_over_two_collections( history_id, inputs ) + + def _check_map_cat1_over_two_collections( self, history_id, inputs ): + response = self._run_cat1( history_id, inputs ) + self._assert_status_code_is( response, 200 ) + response_object = response.json() + outputs = response_object[ 'outputs' ] self.assertEquals( len( outputs ), 2 ) output1 = outputs[ 0 ] output2 = outputs[ 1 ] @@ -274,6 +412,49 @@ self.assertEquals( output1_content.strip(), "123\n789" ) self.assertEquals( output2_content.strip(), "456\n0ab" ) + self.assertEquals( len( response_object[ 'jobs' ] ), 2 ) + self.assertEquals( len( response_object[ 'implicit_collections' ] ), 1 ) + + @skip_without_tool( "cat1" ) + def test_map_over_two_collections_unlinked( self ): + history_id = self.dataset_populator.new_history() + hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) + hdca2_id = self.__build_pair( history_id, [ "789", "0ab" ] ) + inputs = { + "input1": { 'batch': True, 'linked': False, 'values': [ {'src': 'hdca', 'id': hdca1_id } ] }, + "queries_0|input2": { 'batch': True, 'linked': False, 'values': [ { 'src': 'hdca', 'id': hdca2_id } ] }, + } + response = self._run_cat1( history_id, inputs ) + self._assert_status_code_is( response, 200 ) + response_object = response.json() + outputs = response_object[ 'outputs' ] + self.assertEquals( len( outputs ), 4 ) + + self.assertEquals( len( response_object[ 'jobs' ] ), 4 ) + # Implicit collections not created with unlinked inputs yet - this may + # be problematic. + self.assertEquals( len( response_object[ 'implicit_collections' ] ), 0 ) + + @skip_without_tool( "cat1" ) + def test_map_over_collected_and_individual_datasets( self ): + history_id = self.dataset_populator.new_history() + hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) + new_dataset1 = self.dataset_populator.new_dataset( history_id, content='789' ) + new_dataset2 = self.dataset_populator.new_dataset( history_id, content='0ab' ) + + inputs = { + "input1": { 'batch': True, 'values': [ {'src': 'hdca', 'id': hdca1_id } ] }, + "queries_0|input2": { 'batch': True, 'values': [ dataset_to_param( new_dataset1 ), dataset_to_param( new_dataset2 ) ] }, + } + response = self._run_cat1( history_id, inputs ) + self._assert_status_code_is( response, 200 ) + response_object = response.json() + outputs = response_object[ 'outputs' ] + self.assertEquals( len( outputs ), 2 ) + + self.assertEquals( len( response_object[ 'jobs' ] ), 2 ) + self.assertEquals( len( response_object[ 'implicit_collections' ] ), 1 ) + @skip_without_tool( "cat1" ) def test_cannot_map_over_incompatible_collections( self ): history_id = self.dataset_populator.new_history() @@ -289,7 +470,7 @@ assert run_response.status_code >= 400 @skip_without_tool( "multi_data_param" ) - def test_reduce_collections( self ): + def test_reduce_collections_legacy( self ): history_id = self.dataset_populator.new_history() hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) hdca2_id = self.dataset_collection_populator.create_list_in_history( history_id ).json()[ "id" ] @@ -297,6 +478,20 @@ "f1": "__collection_reduce__|%s" % hdca1_id, "f2": "__collection_reduce__|%s" % hdca2_id, } + self._check_simple_reduce_job( history_id, inputs ) + + @skip_without_tool( "multi_data_param" ) + def test_reduce_collections( self ): + history_id = self.dataset_populator.new_history() + hdca1_id = self.__build_pair( history_id, [ "123", "456" ] ) + hdca2_id = self.dataset_collection_populator.create_list_in_history( history_id ).json()[ "id" ] + inputs = { + "f1": { 'src': 'hdca', 'id': hdca1_id }, + "f2": { 'src': 'hdca', 'id': hdca2_id }, + } + self._check_simple_reduce_job( history_id, inputs ) + + def _check_simple_reduce_job( self, history_id, inputs ): create = self._run( "multi_data_param", history_id, inputs, assert_ok=True ) outputs = create[ 'outputs' ] jobs = create[ 'jobs' ] @@ -310,12 +505,27 @@ assert len( output2_content.strip().split("\n") ) == 3, output2_content @skip_without_tool( "collection_paired_test" ) - def test_subcollection_mapping( self ): + def test_subcollection_mapping_legacy( self ): history_id = self.dataset_populator.new_history() hdca_list_id = self.__build_nested_list( history_id ) inputs = { "f1|__collection_multirun__": "%s|paired" % hdca_list_id } + self._check_simple_subcollection_mapping( history_id, inputs ) + + @skip_without_tool( "collection_paired_test" ) + def test_subcollection_mapping( self ): + history_id = self.dataset_populator.new_history() + hdca_list_id = self.__build_nested_list( history_id ) + inputs = { + "f1": { + 'batch': True, + 'values': [ { 'src': 'hdca', 'map_over_type': 'paired', 'id': hdca_list_id }], + } + } + self._check_simple_subcollection_mapping( history_id, inputs ) + + def _check_simple_subcollection_mapping( self, history_id, inputs ): # Following wait not really needed - just getting so many database # locked errors with sqlite. self.dataset_populator.wait_for_history( history_id, assert_ok=True ) @@ -329,7 +539,7 @@ assert output2_content.strip() == "789\n0ab", output2_content @skip_without_tool( "collection_mixed_param" ) - def test_combined_mapping_and_subcollection_mapping( self ): + def test_combined_mapping_and_subcollection_mapping_legacy( self ): history_id = self.dataset_populator.new_history() nested_list_id = self.__build_nested_list( history_id ) create_response = self.dataset_collection_populator.create_list_in_history( history_id, contents=["xxx", "yyy"] ) @@ -338,6 +548,27 @@ "f1|__collection_multirun__": "%s|paired" % nested_list_id, "f2|__collection_multirun__": list_id, } + self._check_combined_mapping_and_subcollection_mapping( history_id, inputs ) + + @skip_without_tool( "collection_mixed_param" ) + def test_combined_mapping_and_subcollection_mapping( self ): + history_id = self.dataset_populator.new_history() + nested_list_id = self.__build_nested_list( history_id ) + create_response = self.dataset_collection_populator.create_list_in_history( history_id, contents=["xxx", "yyy"] ) + list_id = create_response.json()[ "id" ] + inputs = { + "f1": { + 'batch': True, + 'values': [ { 'src': 'hdca', 'map_over_type': 'paired', 'id': nested_list_id }], + }, + "f2": { + 'batch': True, + 'values': [ { 'src': 'hdca', 'id': list_id }], + }, + } + self._check_combined_mapping_and_subcollection_mapping( history_id, inputs ) + + def _check_combined_mapping_and_subcollection_mapping( self, history_id, inputs ): self.dataset_populator.wait_for_history( history_id, assert_ok=True ) outputs = self._run_and_get_outputs( "collection_mixed_param", history_id, inputs ) assert len( outputs ), 2 diff -r 42f677bab2e4880b62758614fcb2c7dc27013788 -r df92b55d194a4d67a1b863c85044eb91c138c74b test/api/test_workflows.py --- a/test/api/test_workflows.py +++ b/test/api/test_workflows.py @@ -361,13 +361,13 @@ hdca = self.dataset_collection_populator.create_pair_in_history( history_id, contents=["1 2 3\n4 5 6", "7 8 9\n10 11 10"] ).json() hdca_id = hdca[ "id" ] inputs1 = { - "input|__collection_multirun__": hdca_id, + "input": { "batch": True, "values": [ { "src": "hdca", "id": hdca_id } ] }, "num_lines": 2 } implicit_hdca1, job_id1 = self._run_tool_get_collection_and_job_id( history_id, "random_lines1", inputs1 ) inputs2 = { - "f1": "__collection_reduce__|%s" % ( implicit_hdca1[ "id" ] ), - "f2": "__collection_reduce__|%s" % ( implicit_hdca1[ "id" ] ) + "f1": { "src": "hdca", "id": implicit_hdca1[ "id" ] }, + "f2": { "src": "hdca", "id": implicit_hdca1[ "id" ] }, } reduction_run_output = self.dataset_populator.run_tool( tool_id="multi_data_param", @@ -413,12 +413,12 @@ hdca = self.dataset_collection_populator.create_pair_in_history( history_id, contents=["1 2 3\n4 5 6", "7 8 9\n10 11 10"] ).json() hdca_id = hdca[ "id" ] inputs1 = { - "input|__collection_multirun__": hdca_id, + "input": { "batch": True, "values": [ { "src": "hdca", "id": hdca_id } ] }, "num_lines": 2 } implicit_hdca1, job_id1 = self._run_tool_get_collection_and_job_id( history_id, "random_lines1", inputs1 ) inputs2 = { - "input|__collection_multirun__": implicit_hdca1[ "id" ], + "input": { "batch": True, "values": [ { "src": "hdca", "id": implicit_hdca1[ "id" ] } ] }, "num_lines": 1 } _, job_id2 = self._run_tool_get_collection_and_job_id( history_id, "random_lines1", inputs2 ) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.