commit/galaxy-central: 3 new changesets

7 May 2014

3 new commits in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/0b087515ad77/
Changeset:   0b087515ad77
User:        jmchilton
Date:        2014-05-07 22:39:03
Summary:     Add framework test tool that has both data and data_collection param - with test case.
Tool test is mildly useful to verify this works for single execution - but more useful as basis for future changesets testing mixed collection and subcollection mapping tool executions.
Affected #:  3 files

diff -r 4bf3a182df988e5c17e5f6080900130636ecf00f -r 0b087515ad774b62d69a35af6d738b70bf006a37 test-data/simple_lines_both.txt

--- /dev/null
+++ b/test-data/simple_lines_both.txt
@@ -0,0 +1,2 @@
+This is a line of text.
+This is a different line of text.
\ No newline at end of file

diff -r 4bf3a182df988e5c17e5f6080900130636ecf00f -r 0b087515ad774b62d69a35af6d738b70bf006a37 test/functional/tools/collection_mixed_param.xml
--- /dev/null
+++ b/test/functional/tools/collection_mixed_param.xml
@@ -0,0 +1,24 @@
+<tool id="collection_mixed_param" name="collection_mixed_param" version="0.1.0">
+  <command>
+    cat #for $f in $f1# ${f} #end for# $f2 >> $out1;
+  </command>
+  <inputs>
+    <param name="f1" type="data_collection" collection_type="paired" />
+    <param name="f2" type="data" format="txt" />
+  </inputs>
+  <outputs>
+    <data format="txt" name="out1" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="f1">
+        <collection type="paired">
+          <element name="left" value="simple_line.txt" />
+          <element name="right" value="simple_line_alternative.txt" />
+        </collection>
+      </param>
+      <param name="f2" value="simple_lines_both.txt" />
+      <output name="out1" file="simple_lines_interleaved.txt"/>
+    </test>
+  </tests>
+</tool>

diff -r 4bf3a182df988e5c17e5f6080900130636ecf00f -r 0b087515ad774b62d69a35af6d738b70bf006a37 test/functional/tools/samples_tool_conf.xml
--- a/test/functional/tools/samples_tool_conf.xml
+++ b/test/functional/tools/samples_tool_conf.xml
@@ -20,4 +20,5 @@
   <tool file="multi_data_param.xml" /><tool file="collection_paired_test.xml" /><tool file="collection_nested_test.xml" />
+  <tool file="collection_mixed_param.xml" /></toolbox>
\ No newline at end of file


https://bitbucket.org/galaxy/galaxy-central/commits/c28fa21b8f8c/
Changeset:   c28fa21b8f8c
User:        jmchilton
Date:        2014-05-07 22:39:03
Summary:     Bugfix: Don't require matching identifiers to match up collections.
I changed this in some other places in the code before committing but this part stayed broken. Collections will match on order, type, and number of elements (pushing the problem to UI) - element identifiers can be different but will be preserved when possible.
Affected #:  1 file

diff -r 0b087515ad774b62d69a35af6d738b70bf006a37 -r c28fa21b8f8ca56068f58c084b9f23fd048e7eea lib/galaxy/dataset_collections/structure.py
--- a/lib/galaxy/dataset_collections/structure.py
+++ b/lib/galaxy/dataset_collections/structure.py
@@ -63,9 +63,6 @@
             return False
 
         for my_child, other_child in zip( self.children, other_structure.children ):
-            if my_child[ 0 ] != other_child[ 0 ]:  # Different identifiers, TODO: generalize
-                return False
-
             # At least one is nested collection...
             if my_child[ 1 ].is_leaf != other_child[ 1 ].is_leaf:
                 return False


https://bitbucket.org/galaxy/galaxy-central/commits/d283202d915e/
Changeset:   d283202d915e
User:        jmchilton
Date:        2014-05-07 22:39:03
Summary:     Bugfix: Rework collection matching logic so it properly matches combined collection/subcollection mapping.
With functional test to verify it works end-to-end with tool execution and unit tests to verify different kinds of collection combinations.
Affected #:  7 files

diff -r c28fa21b8f8ca56068f58c084b9f23fd048e7eea -r d283202d915e6f3fc5f80330a1e85d3a32a16049 lib/galaxy/dataset_collections/matching.py
--- a/lib/galaxy/dataset_collections/matching.py
+++ b/lib/galaxy/dataset_collections/matching.py
@@ -61,7 +61,7 @@
         matching_collections = MatchingCollections()
         for input_key, to_match in collections_to_match.iteritems():
             hdca = to_match.hdca
-            subcollection_type = to_match = to_match.subcollection_type
+            subcollection_type = to_match.subcollection_type
             collection_type_description = collection_type_descriptions.for_collection_type( hdca.collection.collection_type )
             matching_collections.__attempt_add_to_match( input_key, hdca, collection_type_description, subcollection_type )
 

diff -r c28fa21b8f8ca56068f58c084b9f23fd048e7eea -r d283202d915e6f3fc5f80330a1e85d3a32a16049 lib/galaxy/dataset_collections/structure.py
--- a/lib/galaxy/dataset_collections/structure.py
+++ b/lib/galaxy/dataset_collections/structure.py
@@ -18,19 +18,15 @@
 
 class Tree( object ):
 
-    def __init__( self, dataset_collection, collection_type_description, leaf_subcollection_type ):
+    def __init__( self, dataset_collection, collection_type_description ):
         self.collection_type_description = collection_type_description
-        self.leaf_subcollection_type = leaf_subcollection_type  # collection_type to trim tree at...
         children = []
         for element in dataset_collection.elements:
-            child_collection = element.child_collection
-            if child_collection:
+            if collection_type_description.has_subcollections():
+                child_collection = element.child_collection
                 subcollection_type_description = collection_type_description.subcollection_type_description()  # Type description of children
-                if subcollection_type_description.can_match_type( leaf_subcollection_type ):
-                    children.append( ( element.element_identifier, leaf  ) )
-                else:
-                    children.append( ( element.element_identifier, Tree( child_collection, collection_type_description=subcollection_type_description, leaf_subcollection_type=leaf_subcollection_type )  ) )
-            elif element.hda:
+                children.append( ( element.element_identifier, Tree( child_collection, collection_type_description=subcollection_type_description )  ) )
+            else:
                 children.append( ( element.element_identifier, leaf ) )
 
         self.children = children
@@ -56,7 +52,6 @@
 
     def can_match( self, other_structure ):
         if not self.collection_type_description.can_match_type( other_structure.collection_type_description ):
-            # TODO: generalize
             return False
 
         if len( self.children ) != len( other_structure.children ):
@@ -99,4 +94,7 @@
 
 
 def get_structure( dataset_collection_instance, collection_type_description, leaf_subcollection_type=None ):
-    return Tree( dataset_collection_instance.collection, collection_type_description, leaf_subcollection_type=leaf_subcollection_type )
+    if leaf_subcollection_type:
+        collection_type_description = collection_type_description.effective_collection_type_description( leaf_subcollection_type )
+
+    return Tree( dataset_collection_instance.collection, collection_type_description )

diff -r c28fa21b8f8ca56068f58c084b9f23fd048e7eea -r d283202d915e6f3fc5f80330a1e85d3a32a16049 lib/galaxy/dataset_collections/type_description.py
--- a/lib/galaxy/dataset_collections/type_description.py
+++ b/lib/galaxy/dataset_collections/type_description.py
@@ -15,9 +15,9 @@
     """ Abstraction over dataset collection type that ties together string
     reprentation in database/model with type registry.
 
-
-    >>> nested_type_description = CollectionTypeDescription( "list:paired", None )
-    >>> paired_type_description = CollectionTypeDescription( "paired", None )
+    >>> factory = CollectionTypeDescriptionFactory( None )
+    >>> nested_type_description = factory.for_collection_type( "list:paired" )
+    >>> paired_type_description = factory.for_collection_type( "paired" )
     >>> nested_type_description.has_subcollections_of_type( "list" )
     False
     >>> nested_type_description.has_subcollections_of_type( "list:paired" )
@@ -34,6 +34,10 @@
     'paired'
     >>> nested_type_description.rank_collection_type()
     'list'
+    >>> nested_type_description.effective_collection_type( paired_type_description )
+    'list'
+    >>> nested_type_description.effective_collection_type_description( paired_type_description ).collection_type
+    'list'
     """
 
     def __init__( self, collection_type, collection_type_description_factory ):
@@ -41,6 +45,19 @@
         self.collection_type_description_factory = collection_type_description_factory
         self.__has_subcollections = self.collection_type.find( ":" ) > 0
 
+    def effective_collection_type_description( self, subcollection_type ):
+        effective_collection_type = self.effective_collection_type( subcollection_type )
+        return self.collection_type_description_factory.for_collection_type( effective_collection_type )
+
+    def effective_collection_type( self, subcollection_type ):
+        if hasattr( subcollection_type, 'collection_type' ):
+            subcollection_type = subcollection_type.collection_type
+
+        if not self.has_subcollections_of_type( subcollection_type ):
+            raise ValueError( "Cannot compute effective subcollection type of %s over %s" % ( subcollection_type, self ) )
+
+        return self.collection_type[ :-( len( subcollection_type ) + 1 ) ]
+
     def has_subcollections_of_type( self, other_collection_type ):
         """ Take in another type (either flat string or another
         CollectionTypeDescription) and determine if this collection contains

diff -r c28fa21b8f8ca56068f58c084b9f23fd048e7eea -r d283202d915e6f3fc5f80330a1e85d3a32a16049 test/api/helpers.py
--- a/test/api/helpers.py
+++ b/test/api/helpers.py
@@ -291,12 +291,10 @@
         return element_identifiers
 
     def list_identifiers( self, history_id, contents=None ):
-        hda1, hda2, hda3 = self.__datasets( history_id, count=3, contents=contents )
-        element_identifiers = [
-            dict( name="data1", src="hda", id=hda1[ "id" ] ),
-            dict( name="data2", src="hda", id=hda2[ "id" ] ),
-            dict( name="data3", src="hda", id=hda3[ "id" ] ),
-        ]
+        count = 3 if not contents else len( contents )
+        hdas = self.__datasets( history_id, count=count, contents=contents )
+        hda_to_identifier = lambda ( i, hda ): dict( name="data%d" % ( i + 1 ), src="hda", id=hda[ "id" ] )
+        element_identifiers = map( hda_to_identifier, enumerate( hdas ) )
         return element_identifiers
 
     def __create( self, payload ):

diff -r c28fa21b8f8ca56068f58c084b9f23fd048e7eea -r d283202d915e6f3fc5f80330a1e85d3a32a16049 test/api/test_tools.py
--- a/test/api/test_tools.py
+++ b/test/api/test_tools.py
@@ -279,6 +279,27 @@
         assert output1_content.strip() == "123\n456", output1_content
         assert output2_content.strip() == "789\n0ab", output2_content
 
+    @skip_without_tool( "collection_mixed_param" )
+    def test_combined_mapping_and_subcollection_mapping( self ):
+        history_id = self.dataset_populator.new_history()
+        nested_list_id = self.__build_nested_list( history_id )
+        create_response = self.dataset_collection_populator.create_list_in_history( history_id, contents=["xxx", "yyy"] )
+        list_id = create_response.json()[ "id" ]
+        inputs = {
+            "f1|__collection_multirun__": "%s|paired" % nested_list_id,
+            "f2|__collection_multirun__": list_id,
+        }
+        self.dataset_populator.wait_for_history( history_id, assert_ok=True )
+        outputs = self._run_and_get_outputs( "collection_mixed_param", history_id, inputs )
+        assert len( outputs ), 2
+        self.dataset_populator.wait_for_history( history_id, assert_ok=True )
+        output1 = outputs[ 0 ]
+        output2 = outputs[ 1 ]
+        output1_content = self._get_content( history_id, dataset=output1 )
+        output2_content = self._get_content( history_id, dataset=output2 )
+        assert output1_content.strip() == "123\n456\nxxx", output1_content
+        assert output2_content.strip() == "789\n0ab\nyyy", output2_content
+
     def _cat1_outputs( self, history_id, inputs ):
         return self._run_outputs( self._run_cat1( history_id, inputs ) )
 

diff -r c28fa21b8f8ca56068f58c084b9f23fd048e7eea -r d283202d915e6f3fc5f80330a1e85d3a32a16049 test/unit/dataset_collections/test_matching.py
--- /dev/null
+++ b/test/unit/dataset_collections/test_matching.py
@@ -0,0 +1,141 @@
+from galaxy.dataset_collections import (
+    type_description,
+    registry,
+    matching,
+)
+
+TYPE_REGISTRY = registry.DatasetCollectionTypesRegistry( None )
+TYPE_DESCRIPTION_FACTORY = type_description.CollectionTypeDescriptionFactory( TYPE_REGISTRY )
+
+
+def test_pairs_match():
+    assert_can_match( pair_instance(), pair_instance() )
+
+
+def test_lists_of_same_cardinality_match():
+    assert_can_match( list_instance(), list_instance() )
+
+
+def test_nested_lists_match():
+    nested_list = list_instance(
+        elements=[
+            pair_element("data1"),
+            pair_element("data2"),
+            pair_element("data3"),
+        ],
+        collection_type="list:paired",
+    )
+    assert_can_match( nested_list, nested_list )
+
+
+def test_different_types_cannot_match():
+    assert_cannot_match( list_instance(), pair_instance() )
+    assert_cannot_match( pair_instance(), list_instance() )
+
+
+def test_lists_of_different_cardinality_do_not_match():
+    list_1 = list_instance( ids=[ "data1", "data2" ] )
+    list_2 = list_instance( ids=[ "data1", "data2", "data3" ] )
+    assert_cannot_match( list_1, list_2 )
+    assert_cannot_match( list_2, list_1 )
+
+
+def test_valid_collection_subcollection_matching():
+    flat_list = list_instance( ids=[ "data1", "data2", "data3" ] )
+    nested_list = list_instance(
+        elements=[
+            pair_element("data11"),
+            pair_element("data21"),
+            pair_element("data31"),
+        ],
+        collection_type="list:paired",
+    )
+    assert_cannot_match( flat_list, nested_list )
+    assert_cannot_match( nested_list, flat_list )
+    assert_can_match( ( nested_list, "paired" ), flat_list )
+
+
+def assert_can_match( *items ):
+    to_match = build_collections_to_match( *items )
+    matching.MatchingCollections.for_collections( to_match, TYPE_DESCRIPTION_FACTORY )
+
+
+def assert_cannot_match( *items ):
+    to_match = build_collections_to_match( *items )
+    threw_exception = False
+    try:
+        matching.MatchingCollections.for_collections( to_match, TYPE_DESCRIPTION_FACTORY )
+    except Exception:
+        threw_exception = True
+    assert threw_exception
+
+
+def build_collections_to_match( *items ):
+    to_match = matching.CollectionsToMatch()
+
+    for i, item in enumerate( items ):
+        if isinstance( item, tuple ):
+            collection_instance, subcollection_type = item
+        else:
+            collection_instance, subcollection_type = item, None
+        to_match.add( "input_%d" % i, collection_instance, subcollection_type )
+    return to_match
+
+
+def pair_element( element_identifier ):
+    return collection_element( element_identifier, pair_instance().collection )
+
+
+def pair_instance( ):
+    paired_collection_instance = collection_instance( collection_type="paired", elements=[
+        hda_element( "left" ),
+        hda_element( "right" ),
+    ] )
+    return paired_collection_instance
+
+
+def list_instance( collection_type="list", elements=None, ids=None ):
+    if not elements:
+        if ids is None:
+            ids = [ "data1", "data2" ]
+        elements = map(hda_element, ids)
+    list_collection_instance = collection_instance(
+        collection_type=collection_type,
+        elements=elements
+    )
+    return list_collection_instance
+
+
+class MockCollectionInstance( object ):
+
+    def __init__( self, collection_type, elements ):
+        self.collection = MockCollection( collection_type, elements )
+
+
+class MockCollection( object ):
+
+    def __init__( self, collection_type, elements ):
+        self.collection_type = collection_type
+        self.elements = elements
+
+
+class MockCollectionElement( object ):
+
+    def __init__( self, element_identifier, collection ):
+        self.element_identifier = element_identifier
+        self.child_collection = collection
+        self.hda = None
+
+
+class MockHDAElement( object ):
+
+    def __init__( self, element_identifier ):
+        self.element_identifier = element_identifier
+        self.child_collection = False
+        self.hda = object()
+
+
+collection_instance = MockCollectionInstance
+collection = MockCollection
+collection_element = MockCollectionElement
+hda_element = MockHDAElement

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

commits-noreply＠bitbucket.org

tags

participants (1)