commit/galaxy-central: jmchilton: Merged in jmchilton/galaxy-central-fork-1 (pull request #356)

6 May 2014

1 new commit in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/8e6cda4c1b3d/
Changeset:   8e6cda4c1b3d
User:        jmchilton
Date:        2014-05-06 15:13:29
Summary:     Merged in jmchilton/galaxy-central-fork-1 (pull request #356)

Enhancements for Runtime Discovered (Collected Primary) Datasets
Affected #:  13 files

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/tools/__init__.py

--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -42,6 +42,7 @@
 from galaxy.tools.deps import build_dependency_manager
 from galaxy.tools.deps.requirements import parse_requirements_from_xml
 from galaxy.tools.parameters import check_param, params_from_strings, params_to_strings
+from galaxy.tools.parameters import output_collect
 from galaxy.tools.parameters.basic import (BaseURLToolParameter,
                                            DataToolParameter, HiddenToolParameter, LibraryDatasetToolParameter,
                                            SelectToolParameter, ToolParameter, UnvalidatedValue,
@@ -1452,6 +1453,7 @@
             output.hidden = string_as_bool( data_elem.get("hidden", "") )
             output.tool = self
             output.actions = ToolOutputActionGroup( output, data_elem.find( 'actions' ) )
+            output.dataset_collectors = output_collect.dataset_collectors_from_elem( data_elem )
             self.outputs[ output.name ] = output
 
     # TODO: Include the tool's name in any parsing warnings.
@@ -2800,94 +2802,7 @@
         Find any additional datasets generated by a tool and attach (for
         cases where number of outputs is not known in advance).
         """
-        new_primary_datasets = {}
-        try:
-            json_file = open( os.path.join( job_working_directory, jobs.TOOL_PROVIDED_JOB_METADATA_FILE ), 'r' )
-            for line in json_file:
-                line = json.loads( line )
-                if line.get( 'type' ) == 'new_primary_dataset':
-                    new_primary_datasets[ os.path.split( line.get( 'filename' ) )[-1] ] = line
-        except Exception:
-            # This should not be considered an error or warning condition, this file is optional
-            pass
-        # Loop through output file names, looking for generated primary
-        # datasets in form of:
-        #     'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)'
-        primary_datasets = {}
-        for name, outdata in output.items():
-            filenames = []
-            if 'new_file_path' in self.app.config.collect_outputs_from:
-                filenames.extend( glob.glob(os.path.join(self.app.config.new_file_path, "primary_%i_*" % outdata.id) ) )
-            if 'job_working_directory' in self.app.config.collect_outputs_from:
-                filenames.extend( glob.glob(os.path.join(job_working_directory, "primary_%i_*" % outdata.id) ) )
-            for filename in filenames:
-                if not name in primary_datasets:
-                    primary_datasets[name] = {}
-                fields = os.path.basename(filename).split("_")
-                fields.pop(0)
-                parent_id = int(fields.pop(0))
-                designation = fields.pop(0)
-                visible = fields.pop(0).lower()
-                if visible == "visible":
-                    visible = True
-                else:
-                    visible = False
-                ext = fields.pop(0).lower()
-                dbkey = outdata.dbkey
-                if fields:
-                    dbkey = fields[ 0 ]
-                # Create new primary dataset
-                primary_data = self.app.model.HistoryDatasetAssociation( extension=ext,
-                                                                         designation=designation,
-                                                                         visible=visible,
-                                                                         dbkey=dbkey,
-                                                                         create_dataset=True,
-                                                                         sa_session=self.sa_session )
-                self.app.security_agent.copy_dataset_permissions( outdata.dataset, primary_data.dataset )
-                self.sa_session.add( primary_data )
-                self.sa_session.flush()
-                # Move data from temp location to dataset location
-                self.app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True)
-                primary_data.set_size()
-                primary_data.name = "%s (%s)" % ( outdata.name, designation )
-                primary_data.info = outdata.info
-                primary_data.init_meta( copy_from=outdata )
-                primary_data.dbkey = dbkey
-                # Associate new dataset with job
-                job = None
-                for assoc in outdata.creating_job_associations:
-                    job = assoc.job
-                    break
-                if job:
-                    assoc = self.app.model.JobToOutputDatasetAssociation( '__new_primary_file_%s|%s__' % ( name, designation ), primary_data )
-                    assoc.job = job
-                    self.sa_session.add( assoc )
-                    self.sa_session.flush()
-                primary_data.state = outdata.state
-                #add tool/metadata provided information
-                new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] )
-                if new_primary_datasets_attributes:
-                    dataset_att_by_name = dict( ext='extension' )
-                    for att_set in [ 'name', 'info', 'ext', 'dbkey' ]:
-                        dataset_att_name = dataset_att_by_name.get( att_set, att_set )
-                        setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) )
-                primary_data.set_meta()
-                primary_data.set_peek()
-                self.sa_session.add( primary_data )
-                self.sa_session.flush()
-                outdata.history.add_dataset( primary_data )
-                # Add dataset to return dict
-                primary_datasets[name][designation] = primary_data
-                # Need to update all associated output hdas, i.e. history was
-                # shared with job running
-                for dataset in outdata.dataset.history_associations:
-                    if outdata == dataset:
-                        continue
-                    new_data = primary_data.copy()
-                    dataset.history.add_dataset( new_data )
-                    self.sa_session.add( new_data )
-                    self.sa_session.flush()
-        return primary_datasets
+        return output_collect.collect_primary_datatasets( self, output, job_working_directory )
 
     def to_dict( self, trans, link_details=False, io_details=False ):
         """ Returns dict of tool. """

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/tools/parameters/output_collect.py
--- /dev/null
+++ b/lib/galaxy/tools/parameters/output_collect.py
@@ -0,0 +1,219 @@
+""" Code allowing tools to define extra files associated with an output datset.
+"""
+import os
+import re
+import glob
+import json
+
+
+from galaxy import jobs
+from galaxy import util
+from galaxy.util import odict
+
+DATASET_ID_TOKEN = "DATASET_ID"
+DEFAULT_EXTRA_FILENAME_PATTERN = r"primary_DATASET_ID_(?P<designation>[^_]+)_(?P<visible>[^_]+)_(?P<ext>[^_]+)(_(?P<dbkey>[^_]+))?"
+
+
+def collect_primary_datatasets( tool, output, job_working_directory ):
+    app = tool.app
+    sa_session = tool.sa_session
+    new_primary_datasets = {}
+    try:
+        json_file = open( os.path.join( job_working_directory, jobs.TOOL_PROVIDED_JOB_METADATA_FILE ), 'r' )
+        for line in json_file:
+            line = json.loads( line )
+            if line.get( 'type' ) == 'new_primary_dataset':
+                new_primary_datasets[ os.path.split( line.get( 'filename' ) )[-1] ] = line
+    except Exception:
+        # This should not be considered an error or warning condition, this file is optional
+        pass
+    # Loop through output file names, looking for generated primary
+    # datasets in form of:
+    #     'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)'
+    primary_datasets = {}
+    for name, outdata in output.items():
+        dataset_collectors = tool.outputs[ name ].dataset_collectors if name in tool.outputs else [ DEFAULT_DATASET_COLLECTOR ]
+        filenames = odict.odict()
+        if 'new_file_path' in app.config.collect_outputs_from:
+            if DEFAULT_DATASET_COLLECTOR in dataset_collectors:
+                # 'new_file_path' collection should be considered deprecated,
+                # only use old-style matching (glob instead of regex and only
+                # using default collector - if enabled).
+                for filename in glob.glob(os.path.join(app.config.new_file_path, "primary_%i_*" % outdata.id) ):
+                    filenames[ filename ] = DEFAULT_DATASET_COLLECTOR
+        if 'job_working_directory' in app.config.collect_outputs_from:
+            for extra_file_collector in dataset_collectors:
+                directory = job_working_directory
+                if extra_file_collector.directory:
+                    directory = os.path.join( directory, extra_file_collector.directory )
+                    if not util.in_directory( directory, job_working_directory ):
+                        raise Exception( "Problem with tool configuration, attempting to pull in datasets from outside working directory." )
+                if not os.path.isdir( directory ):
+                    continue
+                for filename in os.listdir( directory ):
+                    path = os.path.join( directory, filename )
+                    if not os.path.isfile( path ):
+                        continue
+                    if extra_file_collector.match( outdata, filename ):
+                        filenames[ path ] = extra_file_collector
+        for filename, extra_file_collector in filenames.iteritems():
+            if not name in primary_datasets:
+                primary_datasets[name] = {}
+            fields_match = extra_file_collector.match( outdata, os.path.basename( filename ) )
+            if not fields_match:
+                # Before I guess pop() would just have thrown an IndexError
+                raise Exception( "Problem parsing metadata fields for file %s" % filename )
+            designation = fields_match.designation
+            visible = fields_match.visible
+            ext = fields_match.ext
+            dbkey = fields_match.dbkey
+            # Create new primary dataset
+            primary_data = app.model.HistoryDatasetAssociation( extension=ext,
+                                                                designation=designation,
+                                                                visible=visible,
+                                                                dbkey=dbkey,
+                                                                create_dataset=True,
+                                                                sa_session=sa_session )
+            app.security_agent.copy_dataset_permissions( outdata.dataset, primary_data.dataset )
+            sa_session.add( primary_data )
+            sa_session.flush()
+            # Move data from temp location to dataset location
+            app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True)
+            primary_data.set_size()
+            # If match specified a name use otherwise generate one from
+            # designation.
+            primary_data.name = fields_match.name or "%s (%s)" % ( outdata.name, designation )
+            primary_data.info = outdata.info
+            primary_data.init_meta( copy_from=outdata )
+            primary_data.dbkey = dbkey
+            # Associate new dataset with job
+            job = None
+            for assoc in outdata.creating_job_associations:
+                job = assoc.job
+                break
+            if job:
+                assoc = app.model.JobToOutputDatasetAssociation( '__new_primary_file_%s|%s__' % ( name, designation ), primary_data )
+                assoc.job = job
+                sa_session.add( assoc )
+                sa_session.flush()
+            primary_data.state = outdata.state
+            #add tool/metadata provided information
+            new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] )
+            if new_primary_datasets_attributes:
+                dataset_att_by_name = dict( ext='extension' )
+                for att_set in [ 'name', 'info', 'ext', 'dbkey' ]:
+                    dataset_att_name = dataset_att_by_name.get( att_set, att_set )
+                    setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) )
+            primary_data.set_meta()
+            primary_data.set_peek()
+            sa_session.add( primary_data )
+            sa_session.flush()
+            outdata.history.add_dataset( primary_data )
+            # Add dataset to return dict
+            primary_datasets[name][designation] = primary_data
+            # Need to update all associated output hdas, i.e. history was
+            # shared with job running
+            for dataset in outdata.dataset.history_associations:
+                if outdata == dataset:
+                    continue
+                new_data = primary_data.copy()
+                dataset.history.add_dataset( new_data )
+                sa_session.add( new_data )
+                sa_session.flush()
+    return primary_datasets
+
+
+# XML can describe custom patterns, but these literals describe named
+# patterns that will be replaced.
+NAMED_PATTERNS = {
+    "__default__": DEFAULT_EXTRA_FILENAME_PATTERN,
+    "__name__": r"(?P<name>.*)",
+    "__designation__": r"(?P<designation>.*)",
+    "__name_and_ext__": r"(?P<name>.*)\.(?P<ext>[^\.]+)?",
+    "__designation_and_ext__": r"(?P<designation>.*)\.(?P<ext>[^\._]+)?",
+}
+
+
+def dataset_collectors_from_elem( elem ):
+    primary_dataset_elems = elem.findall( "discover_datasets" )
+    if not primary_dataset_elems:
+        return [ DEFAULT_DATASET_COLLECTOR ]
+    else:
+        return map( lambda elem: DatasetCollector( **elem.attrib ), primary_dataset_elems )
+
+
+class DatasetCollector( object ):
+
+    def __init__( self, **kwargs ):
+        pattern = kwargs.get( "pattern", "__default__" )
+        if pattern in NAMED_PATTERNS:
+            pattern = NAMED_PATTERNS.get( pattern )
+        self.pattern = pattern
+        self.default_dbkey = kwargs.get( "dbkey", None )
+        self.default_ext = kwargs.get( "ext", None )
+        self.default_visible = util.asbool( kwargs.get( "visible", None ) )
+        self.directory = kwargs.get( "directory", None )
+
+    def pattern_for_dataset( self, dataset_instance=None ):
+        token_replacement = r'\d+'
+        if dataset_instance:
+            token_replacement = str( dataset_instance.id )
+        return self.pattern.replace( DATASET_ID_TOKEN, token_replacement )
+
+    def match( self, dataset_instance, filename ):
+        re_match = re.match( self.pattern_for_dataset( dataset_instance ), filename )
+        match_object = None
+        if re_match:
+            match_object = CollectedDatasetMatch( re_match, self )
+        return match_object
+
+
+class CollectedDatasetMatch( object ):
+
+    def __init__( self, re_match, collector ):
+        self.re_match = re_match
+        self.collector = collector
+
+    @property
+    def designation( self ):
+        re_match = self.re_match
+        if "designation" in re_match.groupdict():
+            return re_match.group( "designation" )
+        elif "name" in re_match.groupdict():
+            return re_match.group( "name" )
+        else:
+            return None
+
+    @property
+    def name( self ):
+        """ Return name or None if not defined by the discovery pattern.
+        """
+        re_match = self.re_match
+        name = None
+        if "name" in re_match.groupdict():
+            name = re_match.group( "name" )
+        return name
+
+    @property
+    def dbkey( self ):
+        try:
+            return self.re_match.group( "dbkey" )
+        except IndexError:
+            return self.collector.default_dbkey
+
+    @property
+    def ext( self ):
+        try:
+            return self.re_match.group( "ext" )
+        except IndexError:
+            return self.collector.default_ext
+
+    @property
+    def visible( self ):
+        try:
+            return self.re_match.group( "visible" ).lower() == "visible"
+        except IndexError:
+            return self.collector.default_visible
+
+
+DEFAULT_DATASET_COLLECTOR = DatasetCollector()

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/tools/test.py
--- a/lib/galaxy/tools/test.py
+++ b/lib/galaxy/tools/test.py
@@ -297,6 +297,19 @@
     if name is None:
         raise Exception( "Test output does not have a 'name'" )
 
+    file, attributes = __parse_test_attributes( output_elem, attrib )
+    primary_datasets = {}
+    for primary_elem in ( output_elem.findall( "discovered_dataset" ) or [] ):
+        primary_attrib = dict( primary_elem.attrib )
+        designation = primary_attrib.pop( 'designation', None )
+        if designation is None:
+            raise Exception( "Test primary dataset does not have a 'designation'" )
+        primary_datasets[ designation ] = __parse_test_attributes( primary_elem, primary_attrib )
+    attributes[ "primary_datasets" ] = primary_datasets
+    return name, file, attributes
+
+
+def __parse_test_attributes( output_elem, attrib ):
     assert_list = __parse_assert_list( output_elem )
     file = attrib.pop( 'file', None )
     # File no longer required if an list of assertions was present.
@@ -321,7 +334,7 @@
     attributes['assert_list'] = assert_list
     attributes['extra_files'] = extra_files
     attributes['metadata'] = metadata
-    return name, file, attributes
+    return file, attributes
 
 
 def __parse_assert_list( output_elem ):

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/webapps/galaxy/api/jobs.py
--- a/lib/galaxy/webapps/galaxy/api/jobs.py
+++ b/lib/galaxy/webapps/galaxy/api/jobs.py
@@ -13,6 +13,7 @@
 from galaxy.web.base.controller import UsesLibraryMixinItems
 from galaxy import exceptions
 from galaxy import util
+from galaxy import model
 
 import logging
 log = logging.getLogger( __name__ )
@@ -91,10 +92,62 @@
         :rtype:     dictionary
         :returns:   dictionary containing full description of job data
         """
+        job = self.__get_job( trans, id )
+        return self.encode_all_ids( trans, job.to_dict( 'element' ), True )
+
+    @expose_api
+    def inputs( self, trans, id, **kwd ):
+        """
+        show( trans, id )
+        * GET /api/jobs/{job_id}/inputs
+            returns input datasets created by job
+
+        :type   id: string
+        :param  id: Encoded job id
+
+        :rtype:     dictionary
+        :returns:   dictionary containing input dataset associations
+        """
+        job = self.__get_job( trans, id )
+        return self.__dictify_associations( trans, job.input_datasets, job.input_library_datasets )
+
+    @expose_api
+    def outputs( self, trans, id, **kwd ):
+        """
+        show( trans, id )
+        * GET /api/jobs/{job_id}/outputs
+            returns output datasets created by job
+
+        :type   id: string
+        :param  id: Encoded job id
+
+        :rtype:     dictionary
+        :returns:   dictionary containing output dataset associations
+        """
+        job = self.__get_job( trans, id )
+        return self.__dictify_associations( trans, job.output_datasets, job.output_library_datasets )
+
+    def __dictify_associations( self, trans, *association_lists ):
+        rval = []
+        for association_list in association_lists:
+            rval.extend( map( lambda a: self.__dictify_association( trans, a ), association_list ) )
+        return rval
+
+    def __dictify_association( self, trans, job_dataset_association ):
+        dataset_dict = None
+        dataset = job_dataset_association.dataset
+        if dataset:
+            if isinstance( dataset, model.HistoryDatasetAssociation ):
+                dataset_dict = dict( src="hda", id=trans.security.encode_id( dataset.id ) )
+            else:
+                dataset_dict = dict( src="ldda", id=trans.security.encode_id( dataset.id ) )
+        return dict( name=job_dataset_association.name, dataset=dataset_dict )
+
+    def __get_job( self, trans, id ):
         try:
-            decoded_job_id = trans.security.decode_id(id)
-        except:
-            raise exceptions.ObjectAttributeInvalidException()
+            decoded_job_id = trans.security.decode_id( id )
+        except Exception:
+            raise exceptions.MalformedId()
         query = trans.sa_session.query( trans.app.model.Job ).filter(
             trans.app.model.Job.user == trans.user,
             trans.app.model.Job.id == decoded_job_id
@@ -102,7 +155,7 @@
         job = query.first()
         if job is None:
             raise exceptions.ObjectNotFound()
-        return self.encode_all_ids( trans, job.to_dict( 'element' ), True )
+        return job
 
     @expose_api
     def create( self, trans, payload, **kwd ):

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/webapps/galaxy/api/provenance.py
--- a/lib/galaxy/webapps/galaxy/api/provenance.py
+++ b/lib/galaxy/webapps/galaxy/api/provenance.py
@@ -46,6 +46,7 @@
             return {
                 "id": trans.security.encode_id(item.id),
                 "uuid": ( lambda uuid: str( uuid ) if uuid else None )( item.dataset.uuid),
+                "job_id": trans.security.encode_id( job.id ),
                 "tool_id": job.tool_id,
                 "parameters": self._get_job_record(trans, job, follow),
                 "stderr": job.stderr,

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/webapps/galaxy/buildapp.py
--- a/lib/galaxy/webapps/galaxy/buildapp.py
+++ b/lib/galaxy/webapps/galaxy/buildapp.py
@@ -281,6 +281,8 @@
                             'jobs',
                             path_prefix='/api' )
     webapp.mapper.connect( 'job_search', '/api/jobs/search', controller='jobs', action='search', conditions=dict( method=['POST'] ) )
+    webapp.mapper.connect( 'job_inputs', '/api/jobs/{id}/inputs', controller='jobs', action='inputs', conditions=dict( method=['GET'] ) )
+    webapp.mapper.connect( 'job_outputs', '/api/jobs/{id}/outputs', controller='jobs', action='outputs', conditions=dict( method=['GET'] ) )
 
     # Job files controllers. Only for consumption by remote job runners.
     webapp.mapper.resource( 'file',

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/base/interactor.py
--- a/test/base/interactor.py
+++ b/test/base/interactor.py
@@ -44,16 +44,44 @@
         self.api_key = self.__get_user_key( twill_test_case.user_api_key, twill_test_case.master_api_key, test_user=test_user )
         self.uploads = {}
 
-    def verify_output( self, history_id, output_data, outfile, attributes, shed_tool_id, maxseconds ):
+    def verify_output( self, history_id, output_data, output_testdef, shed_tool_id, maxseconds ):
+        outfile = output_testdef.outfile
+        attributes = output_testdef.attributes
+        name = output_testdef.name
         self.wait_for_history( history_id, maxseconds )
         hid = self.__output_id( output_data )
         fetcher = self.__dataset_fetcher( history_id )
         ## TODO: Twill version verifys dataset is 'ok' in here.
         self.twill_test_case.verify_hid( outfile, hda_id=hid, attributes=attributes, dataset_fetcher=fetcher, shed_tool_id=shed_tool_id )
+
+        primary_datasets = attributes.get( 'primary_datasets', {} )
+        if primary_datasets:
+            job_id = self._dataset_provenance( history_id, hid )[ "job_id" ]
+            outputs = self._get( "jobs/%s/outputs" % ( job_id ) ).json()
+
+        for designation, ( primary_outfile, primary_attributes ) in primary_datasets.iteritems():
+            primary_output = None
+            for output in outputs:
+                if output[ "name" ] == '__new_primary_file_%s|%s__' % ( name, designation ):
+                    primary_output = output
+                    break
+
+            if not primary_output:
+                msg_template = "Failed to find primary dataset with designation [%s] for output with name [%s]"
+                msg_args = ( designation, name )
+                raise Exception( msg_template % msg_args )
+
+            primary_hda_id = primary_output[ "dataset" ][ "id" ]
+            self.twill_test_case.verify_hid( primary_outfile, hda_id=primary_hda_id, attributes=primary_attributes, dataset_fetcher=fetcher, shed_tool_id=shed_tool_id )
+            self._verify_metadata( history_id, primary_hda_id, primary_attributes )
+
+        self._verify_metadata( history_id, hid, attributes )
+
+    def _verify_metadata( self, history_id, hid, attributes ):
         metadata = attributes.get( 'metadata', {} ).copy()
         for key, value in metadata.copy().iteritems():
             new_key = "metadata_%s" % key
-            metadata[ new_key ] = metadata[ key ] 
+            metadata[ new_key ] = metadata[ key ]
             del metadata[ key ]
         expected_file_type = attributes.get( 'ftype', None )
         if expected_file_type:
@@ -319,7 +347,10 @@
     def __init__( self, twill_test_case ):
         self.twill_test_case = twill_test_case
 
-    def verify_output( self, history, output_data, outfile, attributes, shed_tool_id, maxseconds ):
+    def verify_output( self, history, output_data, output_testdef, shed_tool_id, maxseconds ):
+        outfile = output_testdef.outfile
+        attributes = output_testdef.attributes
+
         hid = output_data.get( 'hid' )
         self.twill_test_case.verify_dataset_correctness( outfile, hid=hid, attributes=attributes, shed_tool_id=shed_tool_id, maxseconds=maxseconds )
 

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/test_toolbox.py
--- a/test/functional/test_toolbox.py
+++ b/test/functional/test_toolbox.py
@@ -3,6 +3,7 @@
 from base.twilltestcase import TwillTestCase
 from base.interactor import build_interactor, stage_data_in_history
 from galaxy.tools import DataManagerTool
+from galaxy.util import bunch
 import logging
 log = logging.getLogger( __name__ )
 
@@ -52,6 +53,7 @@
         for output_index, output_tuple in enumerate(testdef.outputs):
             # Get the correct hid
             name, outfile, attributes = output_tuple
+            output_testdef = bunch.Bunch( name=name, outfile=outfile, attributes=attributes )
             try:
                 output_data = data_list[ name ]
             except (TypeError, KeyError):
@@ -64,7 +66,7 @@
                     output_data = data_list[ len(data_list) - len(testdef.outputs) + output_index ]
             self.assertTrue( output_data is not None )
             try:
-                galaxy_interactor.verify_output( history, output_data, outfile, attributes=attributes, shed_tool_id=shed_tool_id, maxseconds=maxseconds )
+                galaxy_interactor.verify_output( history, output_data, output_testdef=output_testdef, shed_tool_id=shed_tool_id, maxseconds=maxseconds )
             except Exception:
                 for stream in ['stdout', 'stderr']:
                     stream_output = galaxy_interactor.get_job_stream( history, output_data, stream=stream )

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/tools/multi_output.xml
--- a/test/functional/tools/multi_output.xml
+++ b/test/functional/tools/multi_output.xml
@@ -1,7 +1,7 @@
 <tool id="multi_output" name="Multi_Output" description="multi_output" force_history_refresh="True" version="0.1.0"><command>
     echo "Hello" > $report;
-    echo "World" > '${__new_file_path__}/primary_${report.id}_moo_visible_?'
+    echo "World Contents" > '${__new_file_path__}/primary_${report.id}_world_visible_?'
   </command><inputs><param name="input" type="integer" value="7" />
@@ -16,6 +16,11 @@
         <assert_contents><has_line line="Hello" /></assert_contents>
+        <discovered_dataset designation="world">
+          <assert_contents>
+            <has_line line="World Contents" />
+          </assert_contents>
+        </discovered_dataset></output></test></tests>

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/tools/multi_output_configured.xml
--- /dev/null
+++ b/test/functional/tools/multi_output_configured.xml
@@ -0,0 +1,43 @@
+<tool id="multi_output_configured" name="Multi_Output_Configured" description="multi_output_configured" force_history_refresh="True" version="0.1.0">
+  <command>
+    echo "Hello" > $report;
+    mkdir subdir1;
+    echo "This" > subdir1/this.txt;
+    echo "That" > subdir1/that.txt;
+    mkdir subdir2;
+    echo "1" > subdir2/CUSTOM_1.txt;
+    echo "2" > subdir2/CUSTOM_2.tabular;
+    echo "3" > subdir2/CUSTOM_3.txt;
+  </command>
+  <inputs>
+    <param name="input" type="integer" value="7" />
+  </inputs>
+  <outputs>
+    <data format="txt" name="report">
+      <discover_datasets pattern="__designation_and_ext__" directory="subdir1" />
+      <discover_datasets pattern="CUSTOM_(?P<designation>.+)\.(?P<ext>.+)" directory="subdir2" />
+    </data>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="7" />
+      <output name="report">
+        <assert_contents>
+          <has_line line="Hello" />
+        </assert_contents>
+        <discovered_dataset designation="this" ftype="txt">
+          <assert_contents><has_line line="This" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="that" ftype="txt">
+          <assert_contents><has_line line="That" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="1" ftype="txt">
+          <assert_contents><has_line line="1" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="2" ftype="tabular">
+          <assert_contents><has_line line="2" /></assert_contents>
+        </discovered_dataset>
+      </output>
+    </test>
+  </tests>
+</tool>

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/tools/samples_tool_conf.xml
--- a/test/functional/tools/samples_tool_conf.xml
+++ b/test/functional/tools/samples_tool_conf.xml
@@ -8,6 +8,7 @@
   <tool file="multi_page.xml"/><tool file="multi_select.xml" /><tool file="multi_output.xml" />
+  <tool file="multi_output_configured.xml" /><tool file="composite_output.xml" /><tool file="metadata.xml" /><tool file="output_order.xml" />

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/workflow.py
--- a/test/functional/workflow.py
+++ b/test/functional/workflow.py
@@ -4,6 +4,7 @@
 from base.interactor import GalaxyInteractorApi, stage_data_in_history
 
 from galaxy.util import parse_xml
+from galaxy.util import bunch
 from galaxy.tools.test import parse_param_elem, require_file, test_data_iter, parse_output_elems
 from json import load, dumps
 
@@ -66,10 +67,11 @@
         for expected_output_def in workflow_test.outputs:
             # Get the correct hid
             name, outfile, attributes = expected_output_def
+            output_testdef = bunch.Bunch( name=name, outfile=outfile, attributes=attributes )
 
             output_data = outputs[ int( name ) ]
             try:
-                galaxy_interactor.verify_output( test_history, output_data, outfile, attributes=attributes, shed_tool_id=None, maxseconds=maxseconds )
+                galaxy_interactor.verify_output( test_history, output_data, output_testdef=output_testdef, shed_tool_id=None, maxseconds=maxseconds )
             except Exception:
                 for stream in ['stdout', 'stderr']:
                     stream_output = galaxy_interactor.get_job_stream( test_history, output_data, stream=stream )

diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/unit/tools/test_collect_primary_datasets.py
--- a/test/unit/tools/test_collect_primary_datasets.py
+++ b/test/unit/tools/test_collect_primary_datasets.py
@@ -5,6 +5,8 @@
 import tools_support
 
 from galaxy import model
+from galaxy import util
+from galaxy.tools.parameters import output_collect
 
 DEFAULT_TOOL_OUTPUT = "out1"
 DEFAULT_EXTRA_NAME = "test1"
@@ -114,6 +116,75 @@
         extra_job_assoc = filter( lambda job_assoc: job_assoc.name.startswith( "__" ), self.job.output_datasets )[ 0 ]
         assert extra_job_assoc.name == "__new_primary_file_out1|test1__"
 
+    def test_pattern_override_designation( self ):
+        self._replace_output_collectors( '''<output><discover_datasets pattern="__designation__" directory="subdir" ext="txt" /></output>''' )
+        self._setup_extra_file( subdir="subdir", filename="foo.txt" )
+        primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ]
+        assert len( primary_outputs ) == 1
+        created_hda = primary_outputs.values()[ 0 ]
+        assert "foo.txt" in created_hda.name
+        assert created_hda.ext == "txt"
+
+    def test_name_and_ext_pattern( self ):
+        self._replace_output_collectors( '''<output><discover_datasets pattern="__name_and_ext__" directory="subdir" /></output>''' )
+        self._setup_extra_file( subdir="subdir", filename="foo1.txt" )
+        self._setup_extra_file( subdir="subdir", filename="foo2.tabular" )
+        primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ]
+        assert len( primary_outputs ) == 2
+        assert primary_outputs[ "foo1" ].ext == "txt"
+        assert primary_outputs[ "foo2" ].ext == "tabular"
+
+    def test_custom_pattern( self ):
+        # Hypothetical oral metagenomic classifier that populates a directory
+        # of files based on name and genome. Use custom regex pattern to grab
+        # and classify these files.
+        self._replace_output_collectors( '''<output><discover_datasets pattern="(?P<designation>.*)__(?P<dbkey>.*).fasta" directory="genome_breakdown" ext="fasta" /></output>''' )
+        self._setup_extra_file( subdir="genome_breakdown", filename="samp1__hg19.fasta" )
+        self._setup_extra_file( subdir="genome_breakdown", filename="samp2__lactLact.fasta" )
+        self._setup_extra_file( subdir="genome_breakdown", filename="samp3__hg19.fasta" )
+        self._setup_extra_file( subdir="genome_breakdown", filename="samp4__lactPlan.fasta" )
+        self._setup_extra_file( subdir="genome_breakdown", filename="samp5__fusoNucl.fasta" )
+
+        # Put a file in directory we don't care about, just to make sure
+        # it doesn't get picked up by pattern.
+        self._setup_extra_file( subdir="genome_breakdown", filename="overview.txt" )
+
+        primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ]
+        assert len( primary_outputs ) == 5
+        genomes = dict( samp1="hg19", samp2="lactLact", samp3="hg19", samp4="lactPlan", samp5="fusoNucl" )
+        for key, hda in primary_outputs.iteritems():
+            assert hda.dbkey == genomes[ key ]
+
+    def test_name_versus_designation( self ):
+        """ This test demonstrates the difference between name and desgination
+        in grouping patterns and named patterns such as __designation__,
+        __name__, __designation_and_ext__, and __name_and_ext__.
+        """
+        self._replace_output_collectors( '''<output>
+            <discover_datasets pattern="__name_and_ext__" directory="subdir_for_name_discovery" />
+            <discover_datasets pattern="__designation_and_ext__" directory="subdir_for_designation_discovery" />
+        </output>''')
+        self._setup_extra_file( subdir="subdir_for_name_discovery", filename="example1.txt" )
+        self._setup_extra_file( subdir="subdir_for_designation_discovery", filename="example2.txt" )
+        primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ]
+        name_output = primary_outputs[ "example1" ]
+        designation_output = primary_outputs[ "example2" ]
+        # While name is also used for designation, designation is not the name -
+        # it is used in the calculation of the name however...
+        assert name_output.name == "example1"
+        assert designation_output.name == "%s (%s)" % ( self.hda.name, "example2" )
+
+    def test_cannot_read_files_outside_job_directory( self ):
+        self._replace_output_collectors( '''<output>
+            <discover_datasets pattern="__name_and_ext__" directory="../../secrets" />
+        </output>''')
+        exception_thrown = False
+        try:
+            self._collect( )
+        except Exception:
+            exception_thrown = True
+        assert exception_thrown
+
     def _collect_default_extra( self, **kwargs ):
         return self._collect( **kwargs )[ DEFAULT_TOOL_OUTPUT ][ DEFAULT_EXTRA_NAME ]
 
@@ -122,6 +193,12 @@
             job_working_directory = self.test_directory
         return self.tool.collect_primary_datasets( self.outputs, job_working_directory )
 
+    def _replace_output_collectors( self, xml_str ):
+        # Rewrite tool as if it had been created with output containing
+        # supplied dataset_collector elem.
+        elem = util.parse_xml_string( xml_str )
+        self.tool.outputs[ DEFAULT_TOOL_OUTPUT ].dataset_collectors = output_collect.dataset_collectors_from_elem( elem )
+
     def _append_job_json( self, object, output_path=None, line_type="new_primary_dataset" ):
         object[ "type" ] = line_type
         if output_path:
@@ -133,7 +210,8 @@
 
     def _setup_extra_file( self, **kwargs ):
         path = kwargs.get( "path", None )
-        if not path:
+        filename = kwargs.get( "filename", None )
+        if not path and not filename:
             name = kwargs.get( "name", DEFAULT_EXTRA_NAME )
             visible = kwargs.get( "visible", "visible" )
             ext = kwargs.get( "ext", "data" )
@@ -142,6 +220,13 @@
             path = os.path.join( directory, "primary_%s_%s_%s_%s" % template_args )
             if "dbkey" in kwargs:
                 path = "%s_%s" % ( path, kwargs[ "dbkey" ] )
+        if not path:
+            assert filename
+            subdir = kwargs.get( "subdir", "." )
+            path = os.path.join( self.test_directory, subdir, filename )
+        directory = os.path.dirname( path )
+        if not os.path.exists( directory ):
+            os.makedirs( directory )
         contents = kwargs.get( "contents", "test contents" )
         open( path, "w" ).write( contents )
         return path

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

commits-noreply＠bitbucket.org

tags

participants (1)