1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/8e6cda4c1b3d/ Changeset: 8e6cda4c1b3d User: jmchilton Date: 2014-05-06 15:13:29 Summary: Merged in jmchilton/galaxy-central-fork-1 (pull request #356) Enhancements for Runtime Discovered (Collected Primary) Datasets Affected #: 13 files diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -42,6 +42,7 @@ from galaxy.tools.deps import build_dependency_manager from galaxy.tools.deps.requirements import parse_requirements_from_xml from galaxy.tools.parameters import check_param, params_from_strings, params_to_strings +from galaxy.tools.parameters import output_collect from galaxy.tools.parameters.basic import (BaseURLToolParameter, DataToolParameter, HiddenToolParameter, LibraryDatasetToolParameter, SelectToolParameter, ToolParameter, UnvalidatedValue, @@ -1452,6 +1453,7 @@ output.hidden = string_as_bool( data_elem.get("hidden", "") ) output.tool = self output.actions = ToolOutputActionGroup( output, data_elem.find( 'actions' ) ) + output.dataset_collectors = output_collect.dataset_collectors_from_elem( data_elem ) self.outputs[ output.name ] = output # TODO: Include the tool's name in any parsing warnings. @@ -2800,94 +2802,7 @@ Find any additional datasets generated by a tool and attach (for cases where number of outputs is not known in advance). """ - new_primary_datasets = {} - try: - json_file = open( os.path.join( job_working_directory, jobs.TOOL_PROVIDED_JOB_METADATA_FILE ), 'r' ) - for line in json_file: - line = json.loads( line ) - if line.get( 'type' ) == 'new_primary_dataset': - new_primary_datasets[ os.path.split( line.get( 'filename' ) )[-1] ] = line - except Exception: - # This should not be considered an error or warning condition, this file is optional - pass - # Loop through output file names, looking for generated primary - # datasets in form of: - # 'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)' - primary_datasets = {} - for name, outdata in output.items(): - filenames = [] - if 'new_file_path' in self.app.config.collect_outputs_from: - filenames.extend( glob.glob(os.path.join(self.app.config.new_file_path, "primary_%i_*" % outdata.id) ) ) - if 'job_working_directory' in self.app.config.collect_outputs_from: - filenames.extend( glob.glob(os.path.join(job_working_directory, "primary_%i_*" % outdata.id) ) ) - for filename in filenames: - if not name in primary_datasets: - primary_datasets[name] = {} - fields = os.path.basename(filename).split("_") - fields.pop(0) - parent_id = int(fields.pop(0)) - designation = fields.pop(0) - visible = fields.pop(0).lower() - if visible == "visible": - visible = True - else: - visible = False - ext = fields.pop(0).lower() - dbkey = outdata.dbkey - if fields: - dbkey = fields[ 0 ] - # Create new primary dataset - primary_data = self.app.model.HistoryDatasetAssociation( extension=ext, - designation=designation, - visible=visible, - dbkey=dbkey, - create_dataset=True, - sa_session=self.sa_session ) - self.app.security_agent.copy_dataset_permissions( outdata.dataset, primary_data.dataset ) - self.sa_session.add( primary_data ) - self.sa_session.flush() - # Move data from temp location to dataset location - self.app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True) - primary_data.set_size() - primary_data.name = "%s (%s)" % ( outdata.name, designation ) - primary_data.info = outdata.info - primary_data.init_meta( copy_from=outdata ) - primary_data.dbkey = dbkey - # Associate new dataset with job - job = None - for assoc in outdata.creating_job_associations: - job = assoc.job - break - if job: - assoc = self.app.model.JobToOutputDatasetAssociation( '__new_primary_file_%s|%s__' % ( name, designation ), primary_data ) - assoc.job = job - self.sa_session.add( assoc ) - self.sa_session.flush() - primary_data.state = outdata.state - #add tool/metadata provided information - new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] ) - if new_primary_datasets_attributes: - dataset_att_by_name = dict( ext='extension' ) - for att_set in [ 'name', 'info', 'ext', 'dbkey' ]: - dataset_att_name = dataset_att_by_name.get( att_set, att_set ) - setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) ) - primary_data.set_meta() - primary_data.set_peek() - self.sa_session.add( primary_data ) - self.sa_session.flush() - outdata.history.add_dataset( primary_data ) - # Add dataset to return dict - primary_datasets[name][designation] = primary_data - # Need to update all associated output hdas, i.e. history was - # shared with job running - for dataset in outdata.dataset.history_associations: - if outdata == dataset: - continue - new_data = primary_data.copy() - dataset.history.add_dataset( new_data ) - self.sa_session.add( new_data ) - self.sa_session.flush() - return primary_datasets + return output_collect.collect_primary_datatasets( self, output, job_working_directory ) def to_dict( self, trans, link_details=False, io_details=False ): """ Returns dict of tool. """ diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/tools/parameters/output_collect.py --- /dev/null +++ b/lib/galaxy/tools/parameters/output_collect.py @@ -0,0 +1,219 @@ +""" Code allowing tools to define extra files associated with an output datset. +""" +import os +import re +import glob +import json + + +from galaxy import jobs +from galaxy import util +from galaxy.util import odict + +DATASET_ID_TOKEN = "DATASET_ID" +DEFAULT_EXTRA_FILENAME_PATTERN = r"primary_DATASET_ID_(?P<designation>[^_]+)_(?P<visible>[^_]+)_(?P<ext>[^_]+)(_(?P<dbkey>[^_]+))?" + + +def collect_primary_datatasets( tool, output, job_working_directory ): + app = tool.app + sa_session = tool.sa_session + new_primary_datasets = {} + try: + json_file = open( os.path.join( job_working_directory, jobs.TOOL_PROVIDED_JOB_METADATA_FILE ), 'r' ) + for line in json_file: + line = json.loads( line ) + if line.get( 'type' ) == 'new_primary_dataset': + new_primary_datasets[ os.path.split( line.get( 'filename' ) )[-1] ] = line + except Exception: + # This should not be considered an error or warning condition, this file is optional + pass + # Loop through output file names, looking for generated primary + # datasets in form of: + # 'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)' + primary_datasets = {} + for name, outdata in output.items(): + dataset_collectors = tool.outputs[ name ].dataset_collectors if name in tool.outputs else [ DEFAULT_DATASET_COLLECTOR ] + filenames = odict.odict() + if 'new_file_path' in app.config.collect_outputs_from: + if DEFAULT_DATASET_COLLECTOR in dataset_collectors: + # 'new_file_path' collection should be considered deprecated, + # only use old-style matching (glob instead of regex and only + # using default collector - if enabled). + for filename in glob.glob(os.path.join(app.config.new_file_path, "primary_%i_*" % outdata.id) ): + filenames[ filename ] = DEFAULT_DATASET_COLLECTOR + if 'job_working_directory' in app.config.collect_outputs_from: + for extra_file_collector in dataset_collectors: + directory = job_working_directory + if extra_file_collector.directory: + directory = os.path.join( directory, extra_file_collector.directory ) + if not util.in_directory( directory, job_working_directory ): + raise Exception( "Problem with tool configuration, attempting to pull in datasets from outside working directory." ) + if not os.path.isdir( directory ): + continue + for filename in os.listdir( directory ): + path = os.path.join( directory, filename ) + if not os.path.isfile( path ): + continue + if extra_file_collector.match( outdata, filename ): + filenames[ path ] = extra_file_collector + for filename, extra_file_collector in filenames.iteritems(): + if not name in primary_datasets: + primary_datasets[name] = {} + fields_match = extra_file_collector.match( outdata, os.path.basename( filename ) ) + if not fields_match: + # Before I guess pop() would just have thrown an IndexError + raise Exception( "Problem parsing metadata fields for file %s" % filename ) + designation = fields_match.designation + visible = fields_match.visible + ext = fields_match.ext + dbkey = fields_match.dbkey + # Create new primary dataset + primary_data = app.model.HistoryDatasetAssociation( extension=ext, + designation=designation, + visible=visible, + dbkey=dbkey, + create_dataset=True, + sa_session=sa_session ) + app.security_agent.copy_dataset_permissions( outdata.dataset, primary_data.dataset ) + sa_session.add( primary_data ) + sa_session.flush() + # Move data from temp location to dataset location + app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True) + primary_data.set_size() + # If match specified a name use otherwise generate one from + # designation. + primary_data.name = fields_match.name or "%s (%s)" % ( outdata.name, designation ) + primary_data.info = outdata.info + primary_data.init_meta( copy_from=outdata ) + primary_data.dbkey = dbkey + # Associate new dataset with job + job = None + for assoc in outdata.creating_job_associations: + job = assoc.job + break + if job: + assoc = app.model.JobToOutputDatasetAssociation( '__new_primary_file_%s|%s__' % ( name, designation ), primary_data ) + assoc.job = job + sa_session.add( assoc ) + sa_session.flush() + primary_data.state = outdata.state + #add tool/metadata provided information + new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] ) + if new_primary_datasets_attributes: + dataset_att_by_name = dict( ext='extension' ) + for att_set in [ 'name', 'info', 'ext', 'dbkey' ]: + dataset_att_name = dataset_att_by_name.get( att_set, att_set ) + setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) ) + primary_data.set_meta() + primary_data.set_peek() + sa_session.add( primary_data ) + sa_session.flush() + outdata.history.add_dataset( primary_data ) + # Add dataset to return dict + primary_datasets[name][designation] = primary_data + # Need to update all associated output hdas, i.e. history was + # shared with job running + for dataset in outdata.dataset.history_associations: + if outdata == dataset: + continue + new_data = primary_data.copy() + dataset.history.add_dataset( new_data ) + sa_session.add( new_data ) + sa_session.flush() + return primary_datasets + + +# XML can describe custom patterns, but these literals describe named +# patterns that will be replaced. +NAMED_PATTERNS = { + "__default__": DEFAULT_EXTRA_FILENAME_PATTERN, + "__name__": r"(?P<name>.*)", + "__designation__": r"(?P<designation>.*)", + "__name_and_ext__": r"(?P<name>.*)\.(?P<ext>[^\.]+)?", + "__designation_and_ext__": r"(?P<designation>.*)\.(?P<ext>[^\._]+)?", +} + + +def dataset_collectors_from_elem( elem ): + primary_dataset_elems = elem.findall( "discover_datasets" ) + if not primary_dataset_elems: + return [ DEFAULT_DATASET_COLLECTOR ] + else: + return map( lambda elem: DatasetCollector( **elem.attrib ), primary_dataset_elems ) + + +class DatasetCollector( object ): + + def __init__( self, **kwargs ): + pattern = kwargs.get( "pattern", "__default__" ) + if pattern in NAMED_PATTERNS: + pattern = NAMED_PATTERNS.get( pattern ) + self.pattern = pattern + self.default_dbkey = kwargs.get( "dbkey", None ) + self.default_ext = kwargs.get( "ext", None ) + self.default_visible = util.asbool( kwargs.get( "visible", None ) ) + self.directory = kwargs.get( "directory", None ) + + def pattern_for_dataset( self, dataset_instance=None ): + token_replacement = r'\d+' + if dataset_instance: + token_replacement = str( dataset_instance.id ) + return self.pattern.replace( DATASET_ID_TOKEN, token_replacement ) + + def match( self, dataset_instance, filename ): + re_match = re.match( self.pattern_for_dataset( dataset_instance ), filename ) + match_object = None + if re_match: + match_object = CollectedDatasetMatch( re_match, self ) + return match_object + + +class CollectedDatasetMatch( object ): + + def __init__( self, re_match, collector ): + self.re_match = re_match + self.collector = collector + + @property + def designation( self ): + re_match = self.re_match + if "designation" in re_match.groupdict(): + return re_match.group( "designation" ) + elif "name" in re_match.groupdict(): + return re_match.group( "name" ) + else: + return None + + @property + def name( self ): + """ Return name or None if not defined by the discovery pattern. + """ + re_match = self.re_match + name = None + if "name" in re_match.groupdict(): + name = re_match.group( "name" ) + return name + + @property + def dbkey( self ): + try: + return self.re_match.group( "dbkey" ) + except IndexError: + return self.collector.default_dbkey + + @property + def ext( self ): + try: + return self.re_match.group( "ext" ) + except IndexError: + return self.collector.default_ext + + @property + def visible( self ): + try: + return self.re_match.group( "visible" ).lower() == "visible" + except IndexError: + return self.collector.default_visible + + +DEFAULT_DATASET_COLLECTOR = DatasetCollector() diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/tools/test.py --- a/lib/galaxy/tools/test.py +++ b/lib/galaxy/tools/test.py @@ -297,6 +297,19 @@ if name is None: raise Exception( "Test output does not have a 'name'" ) + file, attributes = __parse_test_attributes( output_elem, attrib ) + primary_datasets = {} + for primary_elem in ( output_elem.findall( "discovered_dataset" ) or [] ): + primary_attrib = dict( primary_elem.attrib ) + designation = primary_attrib.pop( 'designation', None ) + if designation is None: + raise Exception( "Test primary dataset does not have a 'designation'" ) + primary_datasets[ designation ] = __parse_test_attributes( primary_elem, primary_attrib ) + attributes[ "primary_datasets" ] = primary_datasets + return name, file, attributes + + +def __parse_test_attributes( output_elem, attrib ): assert_list = __parse_assert_list( output_elem ) file = attrib.pop( 'file', None ) # File no longer required if an list of assertions was present. @@ -321,7 +334,7 @@ attributes['assert_list'] = assert_list attributes['extra_files'] = extra_files attributes['metadata'] = metadata - return name, file, attributes + return file, attributes def __parse_assert_list( output_elem ): diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/webapps/galaxy/api/jobs.py --- a/lib/galaxy/webapps/galaxy/api/jobs.py +++ b/lib/galaxy/webapps/galaxy/api/jobs.py @@ -13,6 +13,7 @@ from galaxy.web.base.controller import UsesLibraryMixinItems from galaxy import exceptions from galaxy import util +from galaxy import model import logging log = logging.getLogger( __name__ ) @@ -91,10 +92,62 @@ :rtype: dictionary :returns: dictionary containing full description of job data """ + job = self.__get_job( trans, id ) + return self.encode_all_ids( trans, job.to_dict( 'element' ), True ) + + @expose_api + def inputs( self, trans, id, **kwd ): + """ + show( trans, id ) + * GET /api/jobs/{job_id}/inputs + returns input datasets created by job + + :type id: string + :param id: Encoded job id + + :rtype: dictionary + :returns: dictionary containing input dataset associations + """ + job = self.__get_job( trans, id ) + return self.__dictify_associations( trans, job.input_datasets, job.input_library_datasets ) + + @expose_api + def outputs( self, trans, id, **kwd ): + """ + show( trans, id ) + * GET /api/jobs/{job_id}/outputs + returns output datasets created by job + + :type id: string + :param id: Encoded job id + + :rtype: dictionary + :returns: dictionary containing output dataset associations + """ + job = self.__get_job( trans, id ) + return self.__dictify_associations( trans, job.output_datasets, job.output_library_datasets ) + + def __dictify_associations( self, trans, *association_lists ): + rval = [] + for association_list in association_lists: + rval.extend( map( lambda a: self.__dictify_association( trans, a ), association_list ) ) + return rval + + def __dictify_association( self, trans, job_dataset_association ): + dataset_dict = None + dataset = job_dataset_association.dataset + if dataset: + if isinstance( dataset, model.HistoryDatasetAssociation ): + dataset_dict = dict( src="hda", id=trans.security.encode_id( dataset.id ) ) + else: + dataset_dict = dict( src="ldda", id=trans.security.encode_id( dataset.id ) ) + return dict( name=job_dataset_association.name, dataset=dataset_dict ) + + def __get_job( self, trans, id ): try: - decoded_job_id = trans.security.decode_id(id) - except: - raise exceptions.ObjectAttributeInvalidException() + decoded_job_id = trans.security.decode_id( id ) + except Exception: + raise exceptions.MalformedId() query = trans.sa_session.query( trans.app.model.Job ).filter( trans.app.model.Job.user == trans.user, trans.app.model.Job.id == decoded_job_id @@ -102,7 +155,7 @@ job = query.first() if job is None: raise exceptions.ObjectNotFound() - return self.encode_all_ids( trans, job.to_dict( 'element' ), True ) + return job @expose_api def create( self, trans, payload, **kwd ): diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/webapps/galaxy/api/provenance.py --- a/lib/galaxy/webapps/galaxy/api/provenance.py +++ b/lib/galaxy/webapps/galaxy/api/provenance.py @@ -46,6 +46,7 @@ return { "id": trans.security.encode_id(item.id), "uuid": ( lambda uuid: str( uuid ) if uuid else None )( item.dataset.uuid), + "job_id": trans.security.encode_id( job.id ), "tool_id": job.tool_id, "parameters": self._get_job_record(trans, job, follow), "stderr": job.stderr, diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/webapps/galaxy/buildapp.py --- a/lib/galaxy/webapps/galaxy/buildapp.py +++ b/lib/galaxy/webapps/galaxy/buildapp.py @@ -281,6 +281,8 @@ 'jobs', path_prefix='/api' ) webapp.mapper.connect( 'job_search', '/api/jobs/search', controller='jobs', action='search', conditions=dict( method=['POST'] ) ) + webapp.mapper.connect( 'job_inputs', '/api/jobs/{id}/inputs', controller='jobs', action='inputs', conditions=dict( method=['GET'] ) ) + webapp.mapper.connect( 'job_outputs', '/api/jobs/{id}/outputs', controller='jobs', action='outputs', conditions=dict( method=['GET'] ) ) # Job files controllers. Only for consumption by remote job runners. webapp.mapper.resource( 'file', diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/base/interactor.py --- a/test/base/interactor.py +++ b/test/base/interactor.py @@ -44,16 +44,44 @@ self.api_key = self.__get_user_key( twill_test_case.user_api_key, twill_test_case.master_api_key, test_user=test_user ) self.uploads = {} - def verify_output( self, history_id, output_data, outfile, attributes, shed_tool_id, maxseconds ): + def verify_output( self, history_id, output_data, output_testdef, shed_tool_id, maxseconds ): + outfile = output_testdef.outfile + attributes = output_testdef.attributes + name = output_testdef.name self.wait_for_history( history_id, maxseconds ) hid = self.__output_id( output_data ) fetcher = self.__dataset_fetcher( history_id ) ## TODO: Twill version verifys dataset is 'ok' in here. self.twill_test_case.verify_hid( outfile, hda_id=hid, attributes=attributes, dataset_fetcher=fetcher, shed_tool_id=shed_tool_id ) + + primary_datasets = attributes.get( 'primary_datasets', {} ) + if primary_datasets: + job_id = self._dataset_provenance( history_id, hid )[ "job_id" ] + outputs = self._get( "jobs/%s/outputs" % ( job_id ) ).json() + + for designation, ( primary_outfile, primary_attributes ) in primary_datasets.iteritems(): + primary_output = None + for output in outputs: + if output[ "name" ] == '__new_primary_file_%s|%s__' % ( name, designation ): + primary_output = output + break + + if not primary_output: + msg_template = "Failed to find primary dataset with designation [%s] for output with name [%s]" + msg_args = ( designation, name ) + raise Exception( msg_template % msg_args ) + + primary_hda_id = primary_output[ "dataset" ][ "id" ] + self.twill_test_case.verify_hid( primary_outfile, hda_id=primary_hda_id, attributes=primary_attributes, dataset_fetcher=fetcher, shed_tool_id=shed_tool_id ) + self._verify_metadata( history_id, primary_hda_id, primary_attributes ) + + self._verify_metadata( history_id, hid, attributes ) + + def _verify_metadata( self, history_id, hid, attributes ): metadata = attributes.get( 'metadata', {} ).copy() for key, value in metadata.copy().iteritems(): new_key = "metadata_%s" % key - metadata[ new_key ] = metadata[ key ] + metadata[ new_key ] = metadata[ key ] del metadata[ key ] expected_file_type = attributes.get( 'ftype', None ) if expected_file_type: @@ -319,7 +347,10 @@ def __init__( self, twill_test_case ): self.twill_test_case = twill_test_case - def verify_output( self, history, output_data, outfile, attributes, shed_tool_id, maxseconds ): + def verify_output( self, history, output_data, output_testdef, shed_tool_id, maxseconds ): + outfile = output_testdef.outfile + attributes = output_testdef.attributes + hid = output_data.get( 'hid' ) self.twill_test_case.verify_dataset_correctness( outfile, hid=hid, attributes=attributes, shed_tool_id=shed_tool_id, maxseconds=maxseconds ) diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/test_toolbox.py --- a/test/functional/test_toolbox.py +++ b/test/functional/test_toolbox.py @@ -3,6 +3,7 @@ from base.twilltestcase import TwillTestCase from base.interactor import build_interactor, stage_data_in_history from galaxy.tools import DataManagerTool +from galaxy.util import bunch import logging log = logging.getLogger( __name__ ) @@ -52,6 +53,7 @@ for output_index, output_tuple in enumerate(testdef.outputs): # Get the correct hid name, outfile, attributes = output_tuple + output_testdef = bunch.Bunch( name=name, outfile=outfile, attributes=attributes ) try: output_data = data_list[ name ] except (TypeError, KeyError): @@ -64,7 +66,7 @@ output_data = data_list[ len(data_list) - len(testdef.outputs) + output_index ] self.assertTrue( output_data is not None ) try: - galaxy_interactor.verify_output( history, output_data, outfile, attributes=attributes, shed_tool_id=shed_tool_id, maxseconds=maxseconds ) + galaxy_interactor.verify_output( history, output_data, output_testdef=output_testdef, shed_tool_id=shed_tool_id, maxseconds=maxseconds ) except Exception: for stream in ['stdout', 'stderr']: stream_output = galaxy_interactor.get_job_stream( history, output_data, stream=stream ) diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/tools/multi_output.xml --- a/test/functional/tools/multi_output.xml +++ b/test/functional/tools/multi_output.xml @@ -1,7 +1,7 @@ <tool id="multi_output" name="Multi_Output" description="multi_output" force_history_refresh="True" version="0.1.0"><command> echo "Hello" > $report; - echo "World" > '${__new_file_path__}/primary_${report.id}_moo_visible_?' + echo "World Contents" > '${__new_file_path__}/primary_${report.id}_world_visible_?' </command><inputs><param name="input" type="integer" value="7" /> @@ -16,6 +16,11 @@ <assert_contents><has_line line="Hello" /></assert_contents> + <discovered_dataset designation="world"> + <assert_contents> + <has_line line="World Contents" /> + </assert_contents> + </discovered_dataset></output></test></tests> diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/tools/multi_output_configured.xml --- /dev/null +++ b/test/functional/tools/multi_output_configured.xml @@ -0,0 +1,43 @@ +<tool id="multi_output_configured" name="Multi_Output_Configured" description="multi_output_configured" force_history_refresh="True" version="0.1.0"> + <command> + echo "Hello" > $report; + mkdir subdir1; + echo "This" > subdir1/this.txt; + echo "That" > subdir1/that.txt; + mkdir subdir2; + echo "1" > subdir2/CUSTOM_1.txt; + echo "2" > subdir2/CUSTOM_2.tabular; + echo "3" > subdir2/CUSTOM_3.txt; + </command> + <inputs> + <param name="input" type="integer" value="7" /> + </inputs> + <outputs> + <data format="txt" name="report"> + <discover_datasets pattern="__designation_and_ext__" directory="subdir1" /> + <discover_datasets pattern="CUSTOM_(?P<designation>.+)\.(?P<ext>.+)" directory="subdir2" /> + </data> + </outputs> + <tests> + <test> + <param name="input" value="7" /> + <output name="report"> + <assert_contents> + <has_line line="Hello" /> + </assert_contents> + <discovered_dataset designation="this" ftype="txt"> + <assert_contents><has_line line="This" /></assert_contents> + </discovered_dataset> + <discovered_dataset designation="that" ftype="txt"> + <assert_contents><has_line line="That" /></assert_contents> + </discovered_dataset> + <discovered_dataset designation="1" ftype="txt"> + <assert_contents><has_line line="1" /></assert_contents> + </discovered_dataset> + <discovered_dataset designation="2" ftype="tabular"> + <assert_contents><has_line line="2" /></assert_contents> + </discovered_dataset> + </output> + </test> + </tests> +</tool> diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/tools/samples_tool_conf.xml --- a/test/functional/tools/samples_tool_conf.xml +++ b/test/functional/tools/samples_tool_conf.xml @@ -8,6 +8,7 @@ <tool file="multi_page.xml"/><tool file="multi_select.xml" /><tool file="multi_output.xml" /> + <tool file="multi_output_configured.xml" /><tool file="composite_output.xml" /><tool file="metadata.xml" /><tool file="output_order.xml" /> diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/workflow.py --- a/test/functional/workflow.py +++ b/test/functional/workflow.py @@ -4,6 +4,7 @@ from base.interactor import GalaxyInteractorApi, stage_data_in_history from galaxy.util import parse_xml +from galaxy.util import bunch from galaxy.tools.test import parse_param_elem, require_file, test_data_iter, parse_output_elems from json import load, dumps @@ -66,10 +67,11 @@ for expected_output_def in workflow_test.outputs: # Get the correct hid name, outfile, attributes = expected_output_def + output_testdef = bunch.Bunch( name=name, outfile=outfile, attributes=attributes ) output_data = outputs[ int( name ) ] try: - galaxy_interactor.verify_output( test_history, output_data, outfile, attributes=attributes, shed_tool_id=None, maxseconds=maxseconds ) + galaxy_interactor.verify_output( test_history, output_data, output_testdef=output_testdef, shed_tool_id=None, maxseconds=maxseconds ) except Exception: for stream in ['stdout', 'stderr']: stream_output = galaxy_interactor.get_job_stream( test_history, output_data, stream=stream ) diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/unit/tools/test_collect_primary_datasets.py --- a/test/unit/tools/test_collect_primary_datasets.py +++ b/test/unit/tools/test_collect_primary_datasets.py @@ -5,6 +5,8 @@ import tools_support from galaxy import model +from galaxy import util +from galaxy.tools.parameters import output_collect DEFAULT_TOOL_OUTPUT = "out1" DEFAULT_EXTRA_NAME = "test1" @@ -114,6 +116,75 @@ extra_job_assoc = filter( lambda job_assoc: job_assoc.name.startswith( "__" ), self.job.output_datasets )[ 0 ] assert extra_job_assoc.name == "__new_primary_file_out1|test1__" + def test_pattern_override_designation( self ): + self._replace_output_collectors( '''<output><discover_datasets pattern="__designation__" directory="subdir" ext="txt" /></output>''' ) + self._setup_extra_file( subdir="subdir", filename="foo.txt" ) + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + assert len( primary_outputs ) == 1 + created_hda = primary_outputs.values()[ 0 ] + assert "foo.txt" in created_hda.name + assert created_hda.ext == "txt" + + def test_name_and_ext_pattern( self ): + self._replace_output_collectors( '''<output><discover_datasets pattern="__name_and_ext__" directory="subdir" /></output>''' ) + self._setup_extra_file( subdir="subdir", filename="foo1.txt" ) + self._setup_extra_file( subdir="subdir", filename="foo2.tabular" ) + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + assert len( primary_outputs ) == 2 + assert primary_outputs[ "foo1" ].ext == "txt" + assert primary_outputs[ "foo2" ].ext == "tabular" + + def test_custom_pattern( self ): + # Hypothetical oral metagenomic classifier that populates a directory + # of files based on name and genome. Use custom regex pattern to grab + # and classify these files. + self._replace_output_collectors( '''<output><discover_datasets pattern="(?P<designation>.*)__(?P<dbkey>.*).fasta" directory="genome_breakdown" ext="fasta" /></output>''' ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp1__hg19.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp2__lactLact.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp3__hg19.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp4__lactPlan.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp5__fusoNucl.fasta" ) + + # Put a file in directory we don't care about, just to make sure + # it doesn't get picked up by pattern. + self._setup_extra_file( subdir="genome_breakdown", filename="overview.txt" ) + + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + assert len( primary_outputs ) == 5 + genomes = dict( samp1="hg19", samp2="lactLact", samp3="hg19", samp4="lactPlan", samp5="fusoNucl" ) + for key, hda in primary_outputs.iteritems(): + assert hda.dbkey == genomes[ key ] + + def test_name_versus_designation( self ): + """ This test demonstrates the difference between name and desgination + in grouping patterns and named patterns such as __designation__, + __name__, __designation_and_ext__, and __name_and_ext__. + """ + self._replace_output_collectors( '''<output> + <discover_datasets pattern="__name_and_ext__" directory="subdir_for_name_discovery" /> + <discover_datasets pattern="__designation_and_ext__" directory="subdir_for_designation_discovery" /> + </output>''') + self._setup_extra_file( subdir="subdir_for_name_discovery", filename="example1.txt" ) + self._setup_extra_file( subdir="subdir_for_designation_discovery", filename="example2.txt" ) + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + name_output = primary_outputs[ "example1" ] + designation_output = primary_outputs[ "example2" ] + # While name is also used for designation, designation is not the name - + # it is used in the calculation of the name however... + assert name_output.name == "example1" + assert designation_output.name == "%s (%s)" % ( self.hda.name, "example2" ) + + def test_cannot_read_files_outside_job_directory( self ): + self._replace_output_collectors( '''<output> + <discover_datasets pattern="__name_and_ext__" directory="../../secrets" /> + </output>''') + exception_thrown = False + try: + self._collect( ) + except Exception: + exception_thrown = True + assert exception_thrown + def _collect_default_extra( self, **kwargs ): return self._collect( **kwargs )[ DEFAULT_TOOL_OUTPUT ][ DEFAULT_EXTRA_NAME ] @@ -122,6 +193,12 @@ job_working_directory = self.test_directory return self.tool.collect_primary_datasets( self.outputs, job_working_directory ) + def _replace_output_collectors( self, xml_str ): + # Rewrite tool as if it had been created with output containing + # supplied dataset_collector elem. + elem = util.parse_xml_string( xml_str ) + self.tool.outputs[ DEFAULT_TOOL_OUTPUT ].dataset_collectors = output_collect.dataset_collectors_from_elem( elem ) + def _append_job_json( self, object, output_path=None, line_type="new_primary_dataset" ): object[ "type" ] = line_type if output_path: @@ -133,7 +210,8 @@ def _setup_extra_file( self, **kwargs ): path = kwargs.get( "path", None ) - if not path: + filename = kwargs.get( "filename", None ) + if not path and not filename: name = kwargs.get( "name", DEFAULT_EXTRA_NAME ) visible = kwargs.get( "visible", "visible" ) ext = kwargs.get( "ext", "data" ) @@ -142,6 +220,13 @@ path = os.path.join( directory, "primary_%s_%s_%s_%s" % template_args ) if "dbkey" in kwargs: path = "%s_%s" % ( path, kwargs[ "dbkey" ] ) + if not path: + assert filename + subdir = kwargs.get( "subdir", "." ) + path = os.path.join( self.test_directory, subdir, filename ) + directory = os.path.dirname( path ) + if not os.path.exists( directory ): + os.makedirs( directory ) contents = kwargs.get( "contents", "test contents" ) open( path, "w" ).write( contents ) return path Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.