7 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/92c23f534d2f/ Changeset: 92c23f534d2f User: jmchilton Date: 2014-03-29 23:11:17 Summary: Include job id in dataset provenance API. This allows linking this information with information produced by the jobs API. Affected #: 1 file diff -r f83f4d9965283e8dc8640698262da52080081496 -r 92c23f534d2ff322476e988c998d14672d558e31 lib/galaxy/webapps/galaxy/api/provenance.py --- a/lib/galaxy/webapps/galaxy/api/provenance.py +++ b/lib/galaxy/webapps/galaxy/api/provenance.py @@ -46,6 +46,7 @@ return { "id": trans.security.encode_id(item.id), "uuid": ( lambda uuid: str( uuid ) if uuid else None )( item.dataset.uuid), + "job_id": trans.security.encode_id( job.id ), "tool_id": job.tool_id, "parameters": self._get_job_record(trans, job, follow), "stderr": job.stderr, https://bitbucket.org/galaxy/galaxy-central/commits/839a099d99a3/ Changeset: 839a099d99a3 User: jmchilton Date: 2014-03-29 23:11:17 Summary: Allow fetching input/output dataset association information through jobs API. This will allow tool test framework to discover runtime discovered datastes in subsequent changesets. Affected #: 2 files diff -r 92c23f534d2ff322476e988c998d14672d558e31 -r 839a099d99a3d1f35c35c76740087655eacfe46f lib/galaxy/webapps/galaxy/api/jobs.py --- a/lib/galaxy/webapps/galaxy/api/jobs.py +++ b/lib/galaxy/webapps/galaxy/api/jobs.py @@ -13,6 +13,7 @@ from galaxy.web.base.controller import UsesLibraryMixinItems from galaxy import exceptions from galaxy import util +from galaxy import model import logging log = logging.getLogger( __name__ ) @@ -91,10 +92,62 @@ :rtype: dictionary :returns: dictionary containing full description of job data """ + job = self.__get_job( trans, id ) + return self.encode_all_ids( trans, job.to_dict( 'element' ), True ) + + @expose_api + def inputs( self, trans, id, **kwd ): + """ + show( trans, id ) + * GET /api/jobs/{job_id}/inputs + returns input datasets created by job + + :type id: string + :param id: Encoded job id + + :rtype: dictionary + :returns: dictionary containing input dataset associations + """ + job = self.__get_job( trans, id ) + return self.__dictify_associations( trans, job.input_datasets, job.input_library_datasets ) + + @expose_api + def outputs( self, trans, id, **kwd ): + """ + show( trans, id ) + * GET /api/jobs/{job_id}/outputs + returns output datasets created by job + + :type id: string + :param id: Encoded job id + + :rtype: dictionary + :returns: dictionary containing output dataset associations + """ + job = self.__get_job( trans, id ) + return self.__dictify_associations( trans, job.output_datasets, job.output_library_datasets ) + + def __dictify_associations( self, trans, *association_lists ): + rval = [] + for association_list in association_lists: + rval.extend( map( lambda a: self.__dictify_association( trans, a ), association_list ) ) + return rval + + def __dictify_association( self, trans, job_dataset_association ): + dataset_dict = None + dataset = job_dataset_association.dataset + if dataset: + if isinstance( dataset, model.HistoryDatasetAssociation ): + dataset_dict = dict( src="hda", id=trans.security.encode_id( dataset.id ) ) + else: + dataset_dict = dict( src="ldda", id=trans.security.encode_id( dataset.id ) ) + return dict( name=job_dataset_association.name, dataset=dataset_dict ) + + def __get_job( self, trans, id ): try: - decoded_job_id = trans.security.decode_id(id) - except: - raise exceptions.ObjectAttributeInvalidException() + decoded_job_id = trans.security.decode_id( id ) + except Exception: + raise exceptions.MalformedId() query = trans.sa_session.query( trans.app.model.Job ).filter( trans.app.model.Job.user == trans.user, trans.app.model.Job.id == decoded_job_id @@ -102,7 +155,7 @@ job = query.first() if job is None: raise exceptions.ObjectNotFound() - return self.encode_all_ids( trans, job.to_dict( 'element' ), True ) + return job @expose_api def create( self, trans, payload, **kwd ): diff -r 92c23f534d2ff322476e988c998d14672d558e31 -r 839a099d99a3d1f35c35c76740087655eacfe46f lib/galaxy/webapps/galaxy/buildapp.py --- a/lib/galaxy/webapps/galaxy/buildapp.py +++ b/lib/galaxy/webapps/galaxy/buildapp.py @@ -261,6 +261,8 @@ 'jobs', path_prefix='/api' ) webapp.mapper.connect( 'job_search', '/api/jobs/search', controller='jobs', action='search', conditions=dict( method=['POST'] ) ) + webapp.mapper.connect( 'job_inputs', '/api/jobs/{id}/inputs', controller='jobs', action='inputs', conditions=dict( method=['GET'] ) ) + webapp.mapper.connect( 'job_outputs', '/api/jobs/{id}/outputs', controller='jobs', action='outputs', conditions=dict( method=['GET'] ) ) # Job files controllers. Only for consumption by remote job runners. webapp.mapper.resource( 'file', https://bitbucket.org/galaxy/galaxy-central/commits/0ce5e9eb984b/ Changeset: 0ce5e9eb984b User: jmchilton Date: 2014-03-29 23:11:17 Summary: Allow tools to test runtime discovered datasets. Affected #: 5 files diff -r 839a099d99a3d1f35c35c76740087655eacfe46f -r 0ce5e9eb984bc70f1f1c338a3f1d21ed9d6ddee4 lib/galaxy/tools/test.py --- a/lib/galaxy/tools/test.py +++ b/lib/galaxy/tools/test.py @@ -297,6 +297,19 @@ if name is None: raise Exception( "Test output does not have a 'name'" ) + file, attributes = __parse_test_attributes( output_elem, attrib ) + primary_datasets = {} + for primary_elem in ( output_elem.findall( "discovered_dataset" ) or [] ): + primary_attrib = dict( primary_elem.attrib ) + designation = primary_attrib.pop( 'designation', None ) + if designation is None: + raise Exception( "Test primary dataset does not have a 'designation'" ) + primary_datasets[ designation ] = __parse_test_attributes( primary_elem, primary_attrib ) + attributes[ "primary_datasets" ] = primary_datasets + return name, file, attributes + + +def __parse_test_attributes( output_elem, attrib ): assert_list = __parse_assert_list( output_elem ) file = attrib.pop( 'file', None ) # File no longer required if an list of assertions was present. @@ -321,7 +334,7 @@ attributes['assert_list'] = assert_list attributes['extra_files'] = extra_files attributes['metadata'] = metadata - return name, file, attributes + return file, attributes def __parse_assert_list( output_elem ): diff -r 839a099d99a3d1f35c35c76740087655eacfe46f -r 0ce5e9eb984bc70f1f1c338a3f1d21ed9d6ddee4 test/base/interactor.py --- a/test/base/interactor.py +++ b/test/base/interactor.py @@ -41,16 +41,44 @@ self.api_key = self.__get_user_key( twill_test_case.user_api_key, twill_test_case.master_api_key, test_user=test_user ) self.uploads = {} - def verify_output( self, history_id, output_data, outfile, attributes, shed_tool_id, maxseconds ): + def verify_output( self, history_id, output_data, output_testdef, shed_tool_id, maxseconds ): + outfile = output_testdef.outfile + attributes = output_testdef.attributes + name = output_testdef.name self.wait_for_history( history_id, maxseconds ) hid = self.__output_id( output_data ) fetcher = self.__dataset_fetcher( history_id ) ## TODO: Twill version verifys dataset is 'ok' in here. self.twill_test_case.verify_hid( outfile, hda_id=hid, attributes=attributes, dataset_fetcher=fetcher, shed_tool_id=shed_tool_id ) + + primary_datasets = attributes.get( 'primary_datasets', {} ) + if primary_datasets: + job_id = self._dataset_provenance( history_id, hid )[ "job_id" ] + outputs = self._get( "jobs/%s/outputs" % ( job_id ) ).json() + + for designation, ( primary_outfile, primary_attributes ) in primary_datasets.iteritems(): + primary_output = None + for output in outputs: + if output[ "name" ] == '__new_primary_file_%s|%s__' % ( name, designation ): + primary_output = output + break + + if not primary_output: + msg_template = "Failed to find primary dataset with designation [%s] for output with name [%s]" + msg_args = ( designation, name ) + raise Exception( msg_template % msg_args ) + + primary_hda_id = primary_output[ "dataset" ][ "id" ] + self.twill_test_case.verify_hid( primary_outfile, hda_id=primary_hda_id, attributes=primary_attributes, dataset_fetcher=fetcher, shed_tool_id=shed_tool_id ) + self._verify_metadata( history_id, primary_hda_id, primary_attributes ) + + self._verify_metadata( history_id, hid, attributes ) + + def _verify_metadata( self, history_id, hid, attributes ): metadata = attributes.get( 'metadata', {} ).copy() for key, value in metadata.copy().iteritems(): new_key = "metadata_%s" % key - metadata[ new_key ] = metadata[ key ] + metadata[ new_key ] = metadata[ key ] del metadata[ key ] expected_file_type = attributes.get( 'ftype', None ) if expected_file_type: @@ -316,7 +344,10 @@ def __init__( self, twill_test_case ): self.twill_test_case = twill_test_case - def verify_output( self, history, output_data, outfile, attributes, shed_tool_id, maxseconds ): + def verify_output( self, history, output_data, output_testdef, shed_tool_id, maxseconds ): + outfile = output_testdef.outfile + attributes = output_testdef.attributes + hid = output_data.get( 'hid' ) self.twill_test_case.verify_dataset_correctness( outfile, hid=hid, attributes=attributes, shed_tool_id=shed_tool_id, maxseconds=maxseconds ) diff -r 839a099d99a3d1f35c35c76740087655eacfe46f -r 0ce5e9eb984bc70f1f1c338a3f1d21ed9d6ddee4 test/functional/test_toolbox.py --- a/test/functional/test_toolbox.py +++ b/test/functional/test_toolbox.py @@ -3,6 +3,7 @@ from base.twilltestcase import TwillTestCase from base.interactor import build_interactor, stage_data_in_history from galaxy.tools import DataManagerTool +from galaxy.util import bunch import logging log = logging.getLogger( __name__ ) @@ -52,6 +53,7 @@ for output_index, output_tuple in enumerate(testdef.outputs): # Get the correct hid name, outfile, attributes = output_tuple + output_testdef = bunch.Bunch( name=name, outfile=outfile, attributes=attributes ) try: output_data = data_list[ name ] except (TypeError, KeyError): @@ -64,7 +66,7 @@ output_data = data_list[ len(data_list) - len(testdef.outputs) + output_index ] self.assertTrue( output_data is not None ) try: - galaxy_interactor.verify_output( history, output_data, outfile, attributes=attributes, shed_tool_id=shed_tool_id, maxseconds=maxseconds ) + galaxy_interactor.verify_output( history, output_data, output_testdef=output_testdef, shed_tool_id=shed_tool_id, maxseconds=maxseconds ) except Exception: for stream in ['stdout', 'stderr']: stream_output = galaxy_interactor.get_job_stream( history, output_data, stream=stream ) diff -r 839a099d99a3d1f35c35c76740087655eacfe46f -r 0ce5e9eb984bc70f1f1c338a3f1d21ed9d6ddee4 test/functional/tools/multi_output.xml --- a/test/functional/tools/multi_output.xml +++ b/test/functional/tools/multi_output.xml @@ -1,7 +1,7 @@ <tool id="multi_output" name="Multi_Output" description="multi_output" force_history_refresh="True" version="0.1.0"><command> echo "Hello" > $report; - echo "World" > '${__new_file_path__}/primary_${report.id}_moo_visible_?' + echo "World Contents" > '${__new_file_path__}/primary_${report.id}_world_visible_?' </command><inputs><param name="input" type="integer" value="7" /> @@ -16,6 +16,11 @@ <assert_contents><has_line line="Hello" /></assert_contents> + <discovered_dataset designation="world"> + <assert_contents> + <has_line line="World Contents" /> + </assert_contents> + </discovered_dataset></output></test></tests> diff -r 839a099d99a3d1f35c35c76740087655eacfe46f -r 0ce5e9eb984bc70f1f1c338a3f1d21ed9d6ddee4 test/functional/workflow.py --- a/test/functional/workflow.py +++ b/test/functional/workflow.py @@ -4,6 +4,7 @@ from base.interactor import GalaxyInteractorApi, stage_data_in_history from galaxy.util import parse_xml +from galaxy.util import bunch from galaxy.tools.test import parse_param_elem, require_file, test_data_iter, parse_output_elems from json import load, dumps @@ -66,10 +67,11 @@ for expected_output_def in workflow_test.outputs: # Get the correct hid name, outfile, attributes = expected_output_def + output_testdef = bunch.Bunch( name=name, outfile=outfile, attributes=attributes ) output_data = outputs[ int( name ) ] try: - galaxy_interactor.verify_output( test_history, output_data, outfile, attributes=attributes, shed_tool_id=None, maxseconds=maxseconds ) + galaxy_interactor.verify_output( test_history, output_data, output_testdef=output_testdef, shed_tool_id=None, maxseconds=maxseconds ) except Exception: for stream in ['stdout', 'stderr']: stream_output = galaxy_interactor.get_job_stream( test_history, output_data, stream=stream ) https://bitbucket.org/galaxy/galaxy-central/commits/681f2cc6dcfa/ Changeset: 681f2cc6dcfa User: jmchilton Date: 2014-03-29 23:11:17 Summary: Refactor Tool.collect_primary_datasets into own module. Want to break it down in smaller pieces and make process more configurable. Will be easier if its outside of monolithic tool class. Affected #: 2 files diff -r 0ce5e9eb984bc70f1f1c338a3f1d21ed9d6ddee4 -r 681f2cc6dcfa2a6674875d10fa8c7699be63dba4 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -42,6 +42,7 @@ from galaxy.tools.deps import build_dependency_manager from galaxy.tools.deps.requirements import parse_requirements_from_xml from galaxy.tools.parameters import check_param, params_from_strings, params_to_strings +from galaxy.tools.parameters import output_collect from galaxy.tools.parameters.basic import (BaseURLToolParameter, DataToolParameter, HiddenToolParameter, LibraryDatasetToolParameter, SelectToolParameter, ToolParameter, UnvalidatedValue, @@ -2757,94 +2758,7 @@ Find any additional datasets generated by a tool and attach (for cases where number of outputs is not known in advance). """ - new_primary_datasets = {} - try: - json_file = open( os.path.join( job_working_directory, jobs.TOOL_PROVIDED_JOB_METADATA_FILE ), 'r' ) - for line in json_file: - line = json.loads( line ) - if line.get( 'type' ) == 'new_primary_dataset': - new_primary_datasets[ os.path.split( line.get( 'filename' ) )[-1] ] = line - except Exception: - # This should not be considered an error or warning condition, this file is optional - pass - # Loop through output file names, looking for generated primary - # datasets in form of: - # 'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)' - primary_datasets = {} - for name, outdata in output.items(): - filenames = [] - if 'new_file_path' in self.app.config.collect_outputs_from: - filenames.extend( glob.glob(os.path.join(self.app.config.new_file_path, "primary_%i_*" % outdata.id) ) ) - if 'job_working_directory' in self.app.config.collect_outputs_from: - filenames.extend( glob.glob(os.path.join(job_working_directory, "primary_%i_*" % outdata.id) ) ) - for filename in filenames: - if not name in primary_datasets: - primary_datasets[name] = {} - fields = os.path.basename(filename).split("_") - fields.pop(0) - parent_id = int(fields.pop(0)) - designation = fields.pop(0) - visible = fields.pop(0).lower() - if visible == "visible": - visible = True - else: - visible = False - ext = fields.pop(0).lower() - dbkey = outdata.dbkey - if fields: - dbkey = fields[ 0 ] - # Create new primary dataset - primary_data = self.app.model.HistoryDatasetAssociation( extension=ext, - designation=designation, - visible=visible, - dbkey=dbkey, - create_dataset=True, - sa_session=self.sa_session ) - self.app.security_agent.copy_dataset_permissions( outdata.dataset, primary_data.dataset ) - self.sa_session.add( primary_data ) - self.sa_session.flush() - # Move data from temp location to dataset location - self.app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True) - primary_data.set_size() - primary_data.name = "%s (%s)" % ( outdata.name, designation ) - primary_data.info = outdata.info - primary_data.init_meta( copy_from=outdata ) - primary_data.dbkey = dbkey - # Associate new dataset with job - job = None - for assoc in outdata.creating_job_associations: - job = assoc.job - break - if job: - assoc = self.app.model.JobToOutputDatasetAssociation( '__new_primary_file_%s|%s__' % ( name, designation ), primary_data ) - assoc.job = job - self.sa_session.add( assoc ) - self.sa_session.flush() - primary_data.state = outdata.state - #add tool/metadata provided information - new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] ) - if new_primary_datasets_attributes: - dataset_att_by_name = dict( ext='extension' ) - for att_set in [ 'name', 'info', 'ext', 'dbkey' ]: - dataset_att_name = dataset_att_by_name.get( att_set, att_set ) - setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) ) - primary_data.set_meta() - primary_data.set_peek() - self.sa_session.add( primary_data ) - self.sa_session.flush() - outdata.history.add_dataset( primary_data ) - # Add dataset to return dict - primary_datasets[name][designation] = primary_data - # Need to update all associated output hdas, i.e. history was - # shared with job running - for dataset in outdata.dataset.history_associations: - if outdata == dataset: - continue - new_data = primary_data.copy() - dataset.history.add_dataset( new_data ) - self.sa_session.add( new_data ) - self.sa_session.flush() - return primary_datasets + return output_collect.collect_primary_datatasets( self, output, job_working_directory ) def to_dict( self, trans, link_details=False, io_details=False ): """ Returns dict of tool. """ diff -r 0ce5e9eb984bc70f1f1c338a3f1d21ed9d6ddee4 -r 681f2cc6dcfa2a6674875d10fa8c7699be63dba4 lib/galaxy/tools/parameters/output_collect.py --- /dev/null +++ b/lib/galaxy/tools/parameters/output_collect.py @@ -0,0 +1,101 @@ +""" Code allowing tools to define extra files associated with an output datset. +""" +import os +import glob +import json + + +from galaxy import jobs + + +def collect_primary_datatasets( tool, output, job_working_directory ): + app = tool.app + sa_session = tool.sa_session + new_primary_datasets = {} + try: + json_file = open( os.path.join( job_working_directory, jobs.TOOL_PROVIDED_JOB_METADATA_FILE ), 'r' ) + for line in json_file: + line = json.loads( line ) + if line.get( 'type' ) == 'new_primary_dataset': + new_primary_datasets[ os.path.split( line.get( 'filename' ) )[-1] ] = line + except Exception: + # This should not be considered an error or warning condition, this file is optional + pass + # Loop through output file names, looking for generated primary + # datasets in form of: + # 'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)' + primary_datasets = {} + for name, outdata in output.items(): + filenames = [] + if 'new_file_path' in app.config.collect_outputs_from: + filenames.extend( glob.glob(os.path.join(app.config.new_file_path, "primary_%i_*" % outdata.id) ) ) + if 'job_working_directory' in app.config.collect_outputs_from: + filenames.extend( glob.glob(os.path.join(job_working_directory, "primary_%i_*" % outdata.id) ) ) + for filename in filenames: + if not name in primary_datasets: + primary_datasets[name] = {} + fields = os.path.basename(filename).split("_") + fields.pop(0) + parent_id = int(fields.pop(0)) + designation = fields.pop(0) + visible = fields.pop(0).lower() + if visible == "visible": + visible = True + else: + visible = False + ext = fields.pop(0).lower() + dbkey = outdata.dbkey + if fields: + dbkey = fields[ 0 ] + # Create new primary dataset + primary_data = app.model.HistoryDatasetAssociation( extension=ext, + designation=designation, + visible=visible, + dbkey=dbkey, + create_dataset=True, + sa_session=sa_session ) + app.security_agent.copy_dataset_permissions( outdata.dataset, primary_data.dataset ) + sa_session.add( primary_data ) + sa_session.flush() + # Move data from temp location to dataset location + app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True) + primary_data.set_size() + primary_data.name = "%s (%s)" % ( outdata.name, designation ) + primary_data.info = outdata.info + primary_data.init_meta( copy_from=outdata ) + primary_data.dbkey = dbkey + # Associate new dataset with job + job = None + for assoc in outdata.creating_job_associations: + job = assoc.job + break + if job: + assoc = app.model.JobToOutputDatasetAssociation( '__new_primary_file_%s|%s__' % ( name, designation ), primary_data ) + assoc.job = job + sa_session.add( assoc ) + sa_session.flush() + primary_data.state = outdata.state + #add tool/metadata provided information + new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] ) + if new_primary_datasets_attributes: + dataset_att_by_name = dict( ext='extension' ) + for att_set in [ 'name', 'info', 'ext', 'dbkey' ]: + dataset_att_name = dataset_att_by_name.get( att_set, att_set ) + setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) ) + primary_data.set_meta() + primary_data.set_peek() + sa_session.add( primary_data ) + sa_session.flush() + outdata.history.add_dataset( primary_data ) + # Add dataset to return dict + primary_datasets[name][designation] = primary_data + # Need to update all associated output hdas, i.e. history was + # shared with job running + for dataset in outdata.dataset.history_associations: + if outdata == dataset: + continue + new_data = primary_data.copy() + dataset.history.add_dataset( new_data ) + sa_session.add( new_data ) + sa_session.flush() + return primary_datasets https://bitbucket.org/galaxy/galaxy-central/commits/7b8a02bb183b/ Changeset: 7b8a02bb183b User: jmchilton Date: 2014-03-29 23:11:17 Summary: Rework parsing of metadata from file names of runtime discovered datasets. Newer method uses regex and named groups instead of split for more extensible approach to parsing metadata fields during primary dataset collecting. This method will be easier to allow override and configuration of in subsequent changesets. Affected #: 1 file diff -r 681f2cc6dcfa2a6674875d10fa8c7699be63dba4 -r 7b8a02bb183bcd7edf6ee9997ea73fe2b6097d5b lib/galaxy/tools/parameters/output_collect.py --- a/lib/galaxy/tools/parameters/output_collect.py +++ b/lib/galaxy/tools/parameters/output_collect.py @@ -1,12 +1,15 @@ """ Code allowing tools to define extra files associated with an output datset. """ import os +import re import glob import json from galaxy import jobs +DEFAULT_EXTRA_FILENAME_PATTERN = re.compile(r"primary_(?P<id>\d+)_(?P<designation>[^_]+)_(?P<visible>[^_]+)_(?P<ext>[^_]+)(_(?P<dbkey>[^_]+))?") + def collect_primary_datatasets( tool, output, job_working_directory ): app = tool.app @@ -34,19 +37,14 @@ for filename in filenames: if not name in primary_datasets: primary_datasets[name] = {} - fields = os.path.basename(filename).split("_") - fields.pop(0) - parent_id = int(fields.pop(0)) - designation = fields.pop(0) - visible = fields.pop(0).lower() - if visible == "visible": - visible = True - else: - visible = False - ext = fields.pop(0).lower() - dbkey = outdata.dbkey - if fields: - dbkey = fields[ 0 ] + fields_match = DEFAULT_EXTRA_FILENAME_PATTERN.match( os.path.basename(filename) ) + if not fields_match: + # Before I guess pop() would just have thrown an IndexError + raise Exception( "Problem parsing metadata fields for file %s" % filename ) + designation = fields_match.group( "designation" ) + visible = fields_match.group( "visible" ).lower() == "visible" + ext = fields_match.group( "ext" ).lower() + dbkey = fields_match.group( "dbkey" ) or outdata.dbkey # Create new primary dataset primary_data = app.model.HistoryDatasetAssociation( extension=ext, designation=designation, https://bitbucket.org/galaxy/galaxy-central/commits/56e9203f0200/ Changeset: 56e9203f0200 User: jmchilton Date: 2014-03-29 23:11:17 Summary: Allow tool outputs to configure runtime dataset discovery. Output tags on tool XML datasets may contain any number of child "discover_datasets" elements describing how Galaxy should discover datasests. This new method only works for job_working_directory collection - new_file_path based discovery should be considered deprecated. Example unit and functional tests describe this new configurability in detail. Affected #: 5 files diff -r 7b8a02bb183bcd7edf6ee9997ea73fe2b6097d5b -r 56e9203f020027c8c7a294555f6286ce654f79d2 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -1413,6 +1413,7 @@ output.hidden = string_as_bool( data_elem.get("hidden", "") ) output.tool = self output.actions = ToolOutputActionGroup( output, data_elem.find( 'actions' ) ) + output.dataset_collectors = output_collect.dataset_collectors_from_elem( data_elem ) self.outputs[ output.name ] = output # TODO: Include the tool's name in any parsing warnings. diff -r 7b8a02bb183bcd7edf6ee9997ea73fe2b6097d5b -r 56e9203f020027c8c7a294555f6286ce654f79d2 lib/galaxy/tools/parameters/output_collect.py --- a/lib/galaxy/tools/parameters/output_collect.py +++ b/lib/galaxy/tools/parameters/output_collect.py @@ -7,8 +7,11 @@ from galaxy import jobs +from galaxy import util +from galaxy.util import odict -DEFAULT_EXTRA_FILENAME_PATTERN = re.compile(r"primary_(?P<id>\d+)_(?P<designation>[^_]+)_(?P<visible>[^_]+)_(?P<ext>[^_]+)(_(?P<dbkey>[^_]+))?") +DATASET_ID_TOKEN = "DATASET_ID" +DEFAULT_EXTRA_FILENAME_PATTERN = r"primary_DATASET_ID_(?P<designation>[^_]+)_(?P<visible>[^_]+)_(?P<ext>[^_]+)(_(?P<dbkey>[^_]+))?" def collect_primary_datatasets( tool, output, job_working_directory ): @@ -29,22 +32,41 @@ # 'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)' primary_datasets = {} for name, outdata in output.items(): - filenames = [] + dataset_collectors = tool.outputs[ name ].dataset_collectors if name in tool.outputs else [ DEFAULT_DATASET_COLLECTOR ] + filenames = odict.odict() if 'new_file_path' in app.config.collect_outputs_from: - filenames.extend( glob.glob(os.path.join(app.config.new_file_path, "primary_%i_*" % outdata.id) ) ) + if DEFAULT_DATASET_COLLECTOR in dataset_collectors: + # 'new_file_path' collection should be considered deprecated, + # only use old-style matching (glob instead of regex and only + # using default collector - if enabled). + for filename in glob.glob(os.path.join(app.config.new_file_path, "primary_%i_*" % outdata.id) ): + filenames[ filename ] = DEFAULT_DATASET_COLLECTOR if 'job_working_directory' in app.config.collect_outputs_from: - filenames.extend( glob.glob(os.path.join(job_working_directory, "primary_%i_*" % outdata.id) ) ) - for filename in filenames: + for extra_file_collector in dataset_collectors: + directory = job_working_directory + if extra_file_collector.directory: + directory = os.path.join( directory, extra_file_collector.directory ) + if not util.in_directory( directory, job_working_directory ): + raise Exception( "Problem with tool configuration, attempting to pull in datasets from outside working directory." ) + if not os.path.isdir( directory ): + continue + for filename in os.listdir( directory ): + path = os.path.join( directory, filename ) + if not os.path.isfile( path ): + continue + if extra_file_collector.match( outdata, filename ): + filenames[ path ] = extra_file_collector + for filename, extra_file_collector in filenames.iteritems(): if not name in primary_datasets: primary_datasets[name] = {} - fields_match = DEFAULT_EXTRA_FILENAME_PATTERN.match( os.path.basename(filename) ) + fields_match = extra_file_collector.match( outdata, os.path.basename( filename ) ) if not fields_match: # Before I guess pop() would just have thrown an IndexError raise Exception( "Problem parsing metadata fields for file %s" % filename ) - designation = fields_match.group( "designation" ) - visible = fields_match.group( "visible" ).lower() == "visible" - ext = fields_match.group( "ext" ).lower() - dbkey = fields_match.group( "dbkey" ) or outdata.dbkey + designation = fields_match.designation + visible = fields_match.visible + ext = fields_match.ext + dbkey = fields_match.dbkey # Create new primary dataset primary_data = app.model.HistoryDatasetAssociation( extension=ext, designation=designation, @@ -58,7 +80,9 @@ # Move data from temp location to dataset location app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True) primary_data.set_size() - primary_data.name = "%s (%s)" % ( outdata.name, designation ) + # If match specified a name use otherwise generate one from + # designation. + primary_data.name = fields_match.name or "%s (%s)" % ( outdata.name, designation ) primary_data.info = outdata.info primary_data.init_meta( copy_from=outdata ) primary_data.dbkey = dbkey @@ -97,3 +121,99 @@ sa_session.add( new_data ) sa_session.flush() return primary_datasets + + +# XML can describe custom patterns, but these literals describe named +# patterns that will be replaced. +NAMED_PATTERNS = { + "__default__": DEFAULT_EXTRA_FILENAME_PATTERN, + "__name__": r"(?P<name>.*)", + "__designation__": r"(?P<designation>.*)", + "__name_and_ext__": r"(?P<name>.*)\.(?P<ext>[^\.]+)?", + "__designation_and_ext__": r"(?P<designation>.*)\.(?P<ext>[^\._]+)?", +} + + +def dataset_collectors_from_elem( elem ): + primary_dataset_elems = elem.findall( "discover_datasets" ) + if not primary_dataset_elems: + return [ DEFAULT_DATASET_COLLECTOR ] + else: + return map( lambda elem: DatasetCollector( **elem.attrib ), primary_dataset_elems ) + + +class DatasetCollector( object ): + + def __init__( self, **kwargs ): + pattern = kwargs.get( "pattern", "__default__" ) + if pattern in NAMED_PATTERNS: + pattern = NAMED_PATTERNS.get( pattern ) + self.pattern = pattern + self.default_dbkey = kwargs.get( "dbkey", None ) + self.default_ext = kwargs.get( "ext", None ) + self.default_visible = util.asbool( kwargs.get( "visible", None ) ) + self.directory = kwargs.get( "directory", None ) + + def pattern_for_dataset( self, dataset_instance=None ): + token_replacement = r'\d+' + if dataset_instance: + token_replacement = str( dataset_instance.id ) + return self.pattern.replace( DATASET_ID_TOKEN, token_replacement ) + + def match( self, dataset_instance, filename ): + re_match = re.match( self.pattern_for_dataset( dataset_instance ), filename ) + match_object = None + if re_match: + match_object = CollectedDatasetMatch( re_match, self ) + return match_object + + +class CollectedDatasetMatch( object ): + + def __init__( self, re_match, collector ): + self.re_match = re_match + self.collector = collector + + @property + def designation( self ): + re_match = self.re_match + if "designation" in re_match.groupdict(): + return re_match.group( "designation" ) + elif "name" in re_match.groupdict(): + return re_match.group( "name" ) + else: + return None + + @property + def name( self ): + """ Return name or None if not defined by the discovery pattern. + """ + re_match = self.re_match + name = None + if "name" in re_match.groupdict(): + name = re_match.group( "name" ) + return name + + @property + def dbkey( self ): + try: + return self.re_match.group( "dbkey" ) + except IndexError: + return self.collector.default_dbkey + + @property + def ext( self ): + try: + return self.re_match.group( "ext" ) + except IndexError: + return self.collector.default_ext + + @property + def visible( self ): + try: + return self.re_match.group( "visible" ).lower() == "visible" + except IndexError: + return self.collector.default_visible + + +DEFAULT_DATASET_COLLECTOR = DatasetCollector() diff -r 7b8a02bb183bcd7edf6ee9997ea73fe2b6097d5b -r 56e9203f020027c8c7a294555f6286ce654f79d2 test/functional/tools/multi_output_configured.xml --- /dev/null +++ b/test/functional/tools/multi_output_configured.xml @@ -0,0 +1,43 @@ +<tool id="multi_output_configured" name="Multi_Output_Configured" description="multi_output_configured" force_history_refresh="True" version="0.1.0"> + <command> + echo "Hello" > $report; + mkdir subdir1; + echo "This" > subdir1/this.txt; + echo "That" > subdir1/that.txt; + mkdir subdir2; + echo "1" > subdir2/CUSTOM_1.txt; + echo "2" > subdir2/CUSTOM_2.tabular; + echo "3" > subdir2/CUSTOM_3.txt; + </command> + <inputs> + <param name="input" type="integer" value="7" /> + </inputs> + <outputs> + <data format="txt" name="report"> + <discover_datasets pattern="__designation_and_ext__" directory="subdir1" /> + <discover_datasets pattern="CUSTOM_(?P<designation>.+)\.(?P<ext>.+)" directory="subdir2" /> + </data> + </outputs> + <tests> + <test> + <param name="input" value="7" /> + <output name="report"> + <assert_contents> + <has_line line="Hello" /> + </assert_contents> + <discovered_dataset designation="this" ftype="txt"> + <assert_contents><has_line line="This" /></assert_contents> + </discovered_dataset> + <discovered_dataset designation="that" ftype="txt"> + <assert_contents><has_line line="That" /></assert_contents> + </discovered_dataset> + <discovered_dataset designation="1" ftype="txt"> + <assert_contents><has_line line="1" /></assert_contents> + </discovered_dataset> + <discovered_dataset designation="2" ftype="tabular"> + <assert_contents><has_line line="2" /></assert_contents> + </discovered_dataset> + </output> + </test> + </tests> +</tool> diff -r 7b8a02bb183bcd7edf6ee9997ea73fe2b6097d5b -r 56e9203f020027c8c7a294555f6286ce654f79d2 test/functional/tools/samples_tool_conf.xml --- a/test/functional/tools/samples_tool_conf.xml +++ b/test/functional/tools/samples_tool_conf.xml @@ -8,6 +8,7 @@ <tool file="multi_page.xml"/><tool file="multi_select.xml" /><tool file="multi_output.xml" /> + <tool file="multi_output_configured.xml" /><tool file="composite_output.xml" /><tool file="metadata.xml" /><tool file="output_order.xml" /> diff -r 7b8a02bb183bcd7edf6ee9997ea73fe2b6097d5b -r 56e9203f020027c8c7a294555f6286ce654f79d2 test/unit/tools/test_collect_primary_datasets.py --- a/test/unit/tools/test_collect_primary_datasets.py +++ b/test/unit/tools/test_collect_primary_datasets.py @@ -5,6 +5,8 @@ import tools_support from galaxy import model +from galaxy import util +from galaxy.tools.parameters import output_collect DEFAULT_TOOL_OUTPUT = "out1" DEFAULT_EXTRA_NAME = "test1" @@ -114,6 +116,75 @@ extra_job_assoc = filter( lambda job_assoc: job_assoc.name.startswith( "__" ), self.job.output_datasets )[ 0 ] assert extra_job_assoc.name == "__new_primary_file_out1|test1__" + def test_pattern_override_designation( self ): + self._replace_output_collectors( '''<output><discover_datasets pattern="__designation__" directory="subdir" ext="txt" /></output>''' ) + self._setup_extra_file( subdir="subdir", filename="foo.txt" ) + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + assert len( primary_outputs ) == 1 + created_hda = primary_outputs.values()[ 0 ] + assert "foo.txt" in created_hda.name + assert created_hda.ext == "txt" + + def test_name_and_ext_pattern( self ): + self._replace_output_collectors( '''<output><discover_datasets pattern="__name_and_ext__" directory="subdir" /></output>''' ) + self._setup_extra_file( subdir="subdir", filename="foo1.txt" ) + self._setup_extra_file( subdir="subdir", filename="foo2.tabular" ) + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + assert len( primary_outputs ) == 2 + assert primary_outputs[ "foo1" ].ext == "txt" + assert primary_outputs[ "foo2" ].ext == "tabular" + + def test_custom_pattern( self ): + # Hypothetical oral metagenomic classifier that populates a directory + # of files based on name and genome. Use custom regex pattern to grab + # and classify these files. + self._replace_output_collectors( '''<output><discover_datasets pattern="(?P<designation>.*)__(?P<dbkey>.*).fasta" directory="genome_breakdown" ext="fasta" /></output>''' ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp1__hg19.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp2__lactLact.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp3__hg19.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp4__lactPlan.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp5__fusoNucl.fasta" ) + + # Put a file in directory we don't care about, just to make sure + # it doesn't get picked up by pattern. + self._setup_extra_file( subdir="genome_breakdown", filename="overview.txt" ) + + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + assert len( primary_outputs ) == 5 + genomes = dict( samp1="hg19", samp2="lactLact", samp3="hg19", samp4="lactPlan", samp5="fusoNucl" ) + for key, hda in primary_outputs.iteritems(): + assert hda.dbkey == genomes[ key ] + + def test_name_versus_designation( self ): + """ This test demonstrates the difference between name and desgination + in grouping patterns and named patterns such as __designation__, + __name__, __designation_and_ext__, and __name_and_ext__. + """ + self._replace_output_collectors( '''<output> + <discover_datasets pattern="__name_and_ext__" directory="subdir_for_name_discovery" /> + <discover_datasets pattern="__designation_and_ext__" directory="subdir_for_designation_discovery" /> + </output>''') + self._setup_extra_file( subdir="subdir_for_name_discovery", filename="example1.txt" ) + self._setup_extra_file( subdir="subdir_for_designation_discovery", filename="example2.txt" ) + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + name_output = primary_outputs[ "example1" ] + designation_output = primary_outputs[ "example2" ] + # While name is also used for designation, designation is not the name - + # it is used in the calculation of the name however... + assert name_output.name == "example1" + assert designation_output.name == "%s (%s)" % ( self.hda.name, "example2" ) + + def test_cannot_read_files_outside_job_directory( self ): + self._replace_output_collectors( '''<output> + <discover_datasets pattern="__name_and_ext__" directory="../../secrets" /> + </output>''') + exception_thrown = False + try: + self._collect( ) + except Exception: + exception_thrown = True + assert exception_thrown + def _collect_default_extra( self, **kwargs ): return self._collect( **kwargs )[ DEFAULT_TOOL_OUTPUT ][ DEFAULT_EXTRA_NAME ] @@ -122,6 +193,12 @@ job_working_directory = self.test_directory return self.tool.collect_primary_datasets( self.outputs, job_working_directory ) + def _replace_output_collectors( self, xml_str ): + # Rewrite tool as if it had been created with output containing + # supplied dataset_collector elem. + elem = util.parse_xml_string( xml_str ) + self.tool.outputs[ DEFAULT_TOOL_OUTPUT ].dataset_collectors = output_collect.dataset_collectors_from_elem( elem ) + def _append_job_json( self, object, output_path=None, line_type="new_primary_dataset" ): object[ "type" ] = line_type if output_path: @@ -133,7 +210,8 @@ def _setup_extra_file( self, **kwargs ): path = kwargs.get( "path", None ) - if not path: + filename = kwargs.get( "filename", None ) + if not path and not filename: name = kwargs.get( "name", DEFAULT_EXTRA_NAME ) visible = kwargs.get( "visible", "visible" ) ext = kwargs.get( "ext", "data" ) @@ -142,6 +220,13 @@ path = os.path.join( directory, "primary_%s_%s_%s_%s" % template_args ) if "dbkey" in kwargs: path = "%s_%s" % ( path, kwargs[ "dbkey" ] ) + if not path: + assert filename + subdir = kwargs.get( "subdir", "." ) + path = os.path.join( self.test_directory, subdir, filename ) + directory = os.path.dirname( path ) + if not os.path.exists( directory ): + os.makedirs( directory ) contents = kwargs.get( "contents", "test contents" ) open( path, "w" ).write( contents ) return path https://bitbucket.org/galaxy/galaxy-central/commits/8e6cda4c1b3d/ Changeset: 8e6cda4c1b3d User: jmchilton Date: 2014-05-06 15:13:29 Summary: Merged in jmchilton/galaxy-central-fork-1 (pull request #356) Enhancements for Runtime Discovered (Collected Primary) Datasets Affected #: 13 files diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -42,6 +42,7 @@ from galaxy.tools.deps import build_dependency_manager from galaxy.tools.deps.requirements import parse_requirements_from_xml from galaxy.tools.parameters import check_param, params_from_strings, params_to_strings +from galaxy.tools.parameters import output_collect from galaxy.tools.parameters.basic import (BaseURLToolParameter, DataToolParameter, HiddenToolParameter, LibraryDatasetToolParameter, SelectToolParameter, ToolParameter, UnvalidatedValue, @@ -1452,6 +1453,7 @@ output.hidden = string_as_bool( data_elem.get("hidden", "") ) output.tool = self output.actions = ToolOutputActionGroup( output, data_elem.find( 'actions' ) ) + output.dataset_collectors = output_collect.dataset_collectors_from_elem( data_elem ) self.outputs[ output.name ] = output # TODO: Include the tool's name in any parsing warnings. @@ -2800,94 +2802,7 @@ Find any additional datasets generated by a tool and attach (for cases where number of outputs is not known in advance). """ - new_primary_datasets = {} - try: - json_file = open( os.path.join( job_working_directory, jobs.TOOL_PROVIDED_JOB_METADATA_FILE ), 'r' ) - for line in json_file: - line = json.loads( line ) - if line.get( 'type' ) == 'new_primary_dataset': - new_primary_datasets[ os.path.split( line.get( 'filename' ) )[-1] ] = line - except Exception: - # This should not be considered an error or warning condition, this file is optional - pass - # Loop through output file names, looking for generated primary - # datasets in form of: - # 'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)' - primary_datasets = {} - for name, outdata in output.items(): - filenames = [] - if 'new_file_path' in self.app.config.collect_outputs_from: - filenames.extend( glob.glob(os.path.join(self.app.config.new_file_path, "primary_%i_*" % outdata.id) ) ) - if 'job_working_directory' in self.app.config.collect_outputs_from: - filenames.extend( glob.glob(os.path.join(job_working_directory, "primary_%i_*" % outdata.id) ) ) - for filename in filenames: - if not name in primary_datasets: - primary_datasets[name] = {} - fields = os.path.basename(filename).split("_") - fields.pop(0) - parent_id = int(fields.pop(0)) - designation = fields.pop(0) - visible = fields.pop(0).lower() - if visible == "visible": - visible = True - else: - visible = False - ext = fields.pop(0).lower() - dbkey = outdata.dbkey - if fields: - dbkey = fields[ 0 ] - # Create new primary dataset - primary_data = self.app.model.HistoryDatasetAssociation( extension=ext, - designation=designation, - visible=visible, - dbkey=dbkey, - create_dataset=True, - sa_session=self.sa_session ) - self.app.security_agent.copy_dataset_permissions( outdata.dataset, primary_data.dataset ) - self.sa_session.add( primary_data ) - self.sa_session.flush() - # Move data from temp location to dataset location - self.app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True) - primary_data.set_size() - primary_data.name = "%s (%s)" % ( outdata.name, designation ) - primary_data.info = outdata.info - primary_data.init_meta( copy_from=outdata ) - primary_data.dbkey = dbkey - # Associate new dataset with job - job = None - for assoc in outdata.creating_job_associations: - job = assoc.job - break - if job: - assoc = self.app.model.JobToOutputDatasetAssociation( '__new_primary_file_%s|%s__' % ( name, designation ), primary_data ) - assoc.job = job - self.sa_session.add( assoc ) - self.sa_session.flush() - primary_data.state = outdata.state - #add tool/metadata provided information - new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] ) - if new_primary_datasets_attributes: - dataset_att_by_name = dict( ext='extension' ) - for att_set in [ 'name', 'info', 'ext', 'dbkey' ]: - dataset_att_name = dataset_att_by_name.get( att_set, att_set ) - setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) ) - primary_data.set_meta() - primary_data.set_peek() - self.sa_session.add( primary_data ) - self.sa_session.flush() - outdata.history.add_dataset( primary_data ) - # Add dataset to return dict - primary_datasets[name][designation] = primary_data - # Need to update all associated output hdas, i.e. history was - # shared with job running - for dataset in outdata.dataset.history_associations: - if outdata == dataset: - continue - new_data = primary_data.copy() - dataset.history.add_dataset( new_data ) - self.sa_session.add( new_data ) - self.sa_session.flush() - return primary_datasets + return output_collect.collect_primary_datatasets( self, output, job_working_directory ) def to_dict( self, trans, link_details=False, io_details=False ): """ Returns dict of tool. """ diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/tools/parameters/output_collect.py --- /dev/null +++ b/lib/galaxy/tools/parameters/output_collect.py @@ -0,0 +1,219 @@ +""" Code allowing tools to define extra files associated with an output datset. +""" +import os +import re +import glob +import json + + +from galaxy import jobs +from galaxy import util +from galaxy.util import odict + +DATASET_ID_TOKEN = "DATASET_ID" +DEFAULT_EXTRA_FILENAME_PATTERN = r"primary_DATASET_ID_(?P<designation>[^_]+)_(?P<visible>[^_]+)_(?P<ext>[^_]+)(_(?P<dbkey>[^_]+))?" + + +def collect_primary_datatasets( tool, output, job_working_directory ): + app = tool.app + sa_session = tool.sa_session + new_primary_datasets = {} + try: + json_file = open( os.path.join( job_working_directory, jobs.TOOL_PROVIDED_JOB_METADATA_FILE ), 'r' ) + for line in json_file: + line = json.loads( line ) + if line.get( 'type' ) == 'new_primary_dataset': + new_primary_datasets[ os.path.split( line.get( 'filename' ) )[-1] ] = line + except Exception: + # This should not be considered an error or warning condition, this file is optional + pass + # Loop through output file names, looking for generated primary + # datasets in form of: + # 'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)' + primary_datasets = {} + for name, outdata in output.items(): + dataset_collectors = tool.outputs[ name ].dataset_collectors if name in tool.outputs else [ DEFAULT_DATASET_COLLECTOR ] + filenames = odict.odict() + if 'new_file_path' in app.config.collect_outputs_from: + if DEFAULT_DATASET_COLLECTOR in dataset_collectors: + # 'new_file_path' collection should be considered deprecated, + # only use old-style matching (glob instead of regex and only + # using default collector - if enabled). + for filename in glob.glob(os.path.join(app.config.new_file_path, "primary_%i_*" % outdata.id) ): + filenames[ filename ] = DEFAULT_DATASET_COLLECTOR + if 'job_working_directory' in app.config.collect_outputs_from: + for extra_file_collector in dataset_collectors: + directory = job_working_directory + if extra_file_collector.directory: + directory = os.path.join( directory, extra_file_collector.directory ) + if not util.in_directory( directory, job_working_directory ): + raise Exception( "Problem with tool configuration, attempting to pull in datasets from outside working directory." ) + if not os.path.isdir( directory ): + continue + for filename in os.listdir( directory ): + path = os.path.join( directory, filename ) + if not os.path.isfile( path ): + continue + if extra_file_collector.match( outdata, filename ): + filenames[ path ] = extra_file_collector + for filename, extra_file_collector in filenames.iteritems(): + if not name in primary_datasets: + primary_datasets[name] = {} + fields_match = extra_file_collector.match( outdata, os.path.basename( filename ) ) + if not fields_match: + # Before I guess pop() would just have thrown an IndexError + raise Exception( "Problem parsing metadata fields for file %s" % filename ) + designation = fields_match.designation + visible = fields_match.visible + ext = fields_match.ext + dbkey = fields_match.dbkey + # Create new primary dataset + primary_data = app.model.HistoryDatasetAssociation( extension=ext, + designation=designation, + visible=visible, + dbkey=dbkey, + create_dataset=True, + sa_session=sa_session ) + app.security_agent.copy_dataset_permissions( outdata.dataset, primary_data.dataset ) + sa_session.add( primary_data ) + sa_session.flush() + # Move data from temp location to dataset location + app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True) + primary_data.set_size() + # If match specified a name use otherwise generate one from + # designation. + primary_data.name = fields_match.name or "%s (%s)" % ( outdata.name, designation ) + primary_data.info = outdata.info + primary_data.init_meta( copy_from=outdata ) + primary_data.dbkey = dbkey + # Associate new dataset with job + job = None + for assoc in outdata.creating_job_associations: + job = assoc.job + break + if job: + assoc = app.model.JobToOutputDatasetAssociation( '__new_primary_file_%s|%s__' % ( name, designation ), primary_data ) + assoc.job = job + sa_session.add( assoc ) + sa_session.flush() + primary_data.state = outdata.state + #add tool/metadata provided information + new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] ) + if new_primary_datasets_attributes: + dataset_att_by_name = dict( ext='extension' ) + for att_set in [ 'name', 'info', 'ext', 'dbkey' ]: + dataset_att_name = dataset_att_by_name.get( att_set, att_set ) + setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) ) + primary_data.set_meta() + primary_data.set_peek() + sa_session.add( primary_data ) + sa_session.flush() + outdata.history.add_dataset( primary_data ) + # Add dataset to return dict + primary_datasets[name][designation] = primary_data + # Need to update all associated output hdas, i.e. history was + # shared with job running + for dataset in outdata.dataset.history_associations: + if outdata == dataset: + continue + new_data = primary_data.copy() + dataset.history.add_dataset( new_data ) + sa_session.add( new_data ) + sa_session.flush() + return primary_datasets + + +# XML can describe custom patterns, but these literals describe named +# patterns that will be replaced. +NAMED_PATTERNS = { + "__default__": DEFAULT_EXTRA_FILENAME_PATTERN, + "__name__": r"(?P<name>.*)", + "__designation__": r"(?P<designation>.*)", + "__name_and_ext__": r"(?P<name>.*)\.(?P<ext>[^\.]+)?", + "__designation_and_ext__": r"(?P<designation>.*)\.(?P<ext>[^\._]+)?", +} + + +def dataset_collectors_from_elem( elem ): + primary_dataset_elems = elem.findall( "discover_datasets" ) + if not primary_dataset_elems: + return [ DEFAULT_DATASET_COLLECTOR ] + else: + return map( lambda elem: DatasetCollector( **elem.attrib ), primary_dataset_elems ) + + +class DatasetCollector( object ): + + def __init__( self, **kwargs ): + pattern = kwargs.get( "pattern", "__default__" ) + if pattern in NAMED_PATTERNS: + pattern = NAMED_PATTERNS.get( pattern ) + self.pattern = pattern + self.default_dbkey = kwargs.get( "dbkey", None ) + self.default_ext = kwargs.get( "ext", None ) + self.default_visible = util.asbool( kwargs.get( "visible", None ) ) + self.directory = kwargs.get( "directory", None ) + + def pattern_for_dataset( self, dataset_instance=None ): + token_replacement = r'\d+' + if dataset_instance: + token_replacement = str( dataset_instance.id ) + return self.pattern.replace( DATASET_ID_TOKEN, token_replacement ) + + def match( self, dataset_instance, filename ): + re_match = re.match( self.pattern_for_dataset( dataset_instance ), filename ) + match_object = None + if re_match: + match_object = CollectedDatasetMatch( re_match, self ) + return match_object + + +class CollectedDatasetMatch( object ): + + def __init__( self, re_match, collector ): + self.re_match = re_match + self.collector = collector + + @property + def designation( self ): + re_match = self.re_match + if "designation" in re_match.groupdict(): + return re_match.group( "designation" ) + elif "name" in re_match.groupdict(): + return re_match.group( "name" ) + else: + return None + + @property + def name( self ): + """ Return name or None if not defined by the discovery pattern. + """ + re_match = self.re_match + name = None + if "name" in re_match.groupdict(): + name = re_match.group( "name" ) + return name + + @property + def dbkey( self ): + try: + return self.re_match.group( "dbkey" ) + except IndexError: + return self.collector.default_dbkey + + @property + def ext( self ): + try: + return self.re_match.group( "ext" ) + except IndexError: + return self.collector.default_ext + + @property + def visible( self ): + try: + return self.re_match.group( "visible" ).lower() == "visible" + except IndexError: + return self.collector.default_visible + + +DEFAULT_DATASET_COLLECTOR = DatasetCollector() diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/tools/test.py --- a/lib/galaxy/tools/test.py +++ b/lib/galaxy/tools/test.py @@ -297,6 +297,19 @@ if name is None: raise Exception( "Test output does not have a 'name'" ) + file, attributes = __parse_test_attributes( output_elem, attrib ) + primary_datasets = {} + for primary_elem in ( output_elem.findall( "discovered_dataset" ) or [] ): + primary_attrib = dict( primary_elem.attrib ) + designation = primary_attrib.pop( 'designation', None ) + if designation is None: + raise Exception( "Test primary dataset does not have a 'designation'" ) + primary_datasets[ designation ] = __parse_test_attributes( primary_elem, primary_attrib ) + attributes[ "primary_datasets" ] = primary_datasets + return name, file, attributes + + +def __parse_test_attributes( output_elem, attrib ): assert_list = __parse_assert_list( output_elem ) file = attrib.pop( 'file', None ) # File no longer required if an list of assertions was present. @@ -321,7 +334,7 @@ attributes['assert_list'] = assert_list attributes['extra_files'] = extra_files attributes['metadata'] = metadata - return name, file, attributes + return file, attributes def __parse_assert_list( output_elem ): diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/webapps/galaxy/api/jobs.py --- a/lib/galaxy/webapps/galaxy/api/jobs.py +++ b/lib/galaxy/webapps/galaxy/api/jobs.py @@ -13,6 +13,7 @@ from galaxy.web.base.controller import UsesLibraryMixinItems from galaxy import exceptions from galaxy import util +from galaxy import model import logging log = logging.getLogger( __name__ ) @@ -91,10 +92,62 @@ :rtype: dictionary :returns: dictionary containing full description of job data """ + job = self.__get_job( trans, id ) + return self.encode_all_ids( trans, job.to_dict( 'element' ), True ) + + @expose_api + def inputs( self, trans, id, **kwd ): + """ + show( trans, id ) + * GET /api/jobs/{job_id}/inputs + returns input datasets created by job + + :type id: string + :param id: Encoded job id + + :rtype: dictionary + :returns: dictionary containing input dataset associations + """ + job = self.__get_job( trans, id ) + return self.__dictify_associations( trans, job.input_datasets, job.input_library_datasets ) + + @expose_api + def outputs( self, trans, id, **kwd ): + """ + show( trans, id ) + * GET /api/jobs/{job_id}/outputs + returns output datasets created by job + + :type id: string + :param id: Encoded job id + + :rtype: dictionary + :returns: dictionary containing output dataset associations + """ + job = self.__get_job( trans, id ) + return self.__dictify_associations( trans, job.output_datasets, job.output_library_datasets ) + + def __dictify_associations( self, trans, *association_lists ): + rval = [] + for association_list in association_lists: + rval.extend( map( lambda a: self.__dictify_association( trans, a ), association_list ) ) + return rval + + def __dictify_association( self, trans, job_dataset_association ): + dataset_dict = None + dataset = job_dataset_association.dataset + if dataset: + if isinstance( dataset, model.HistoryDatasetAssociation ): + dataset_dict = dict( src="hda", id=trans.security.encode_id( dataset.id ) ) + else: + dataset_dict = dict( src="ldda", id=trans.security.encode_id( dataset.id ) ) + return dict( name=job_dataset_association.name, dataset=dataset_dict ) + + def __get_job( self, trans, id ): try: - decoded_job_id = trans.security.decode_id(id) - except: - raise exceptions.ObjectAttributeInvalidException() + decoded_job_id = trans.security.decode_id( id ) + except Exception: + raise exceptions.MalformedId() query = trans.sa_session.query( trans.app.model.Job ).filter( trans.app.model.Job.user == trans.user, trans.app.model.Job.id == decoded_job_id @@ -102,7 +155,7 @@ job = query.first() if job is None: raise exceptions.ObjectNotFound() - return self.encode_all_ids( trans, job.to_dict( 'element' ), True ) + return job @expose_api def create( self, trans, payload, **kwd ): diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/webapps/galaxy/api/provenance.py --- a/lib/galaxy/webapps/galaxy/api/provenance.py +++ b/lib/galaxy/webapps/galaxy/api/provenance.py @@ -46,6 +46,7 @@ return { "id": trans.security.encode_id(item.id), "uuid": ( lambda uuid: str( uuid ) if uuid else None )( item.dataset.uuid), + "job_id": trans.security.encode_id( job.id ), "tool_id": job.tool_id, "parameters": self._get_job_record(trans, job, follow), "stderr": job.stderr, diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b lib/galaxy/webapps/galaxy/buildapp.py --- a/lib/galaxy/webapps/galaxy/buildapp.py +++ b/lib/galaxy/webapps/galaxy/buildapp.py @@ -281,6 +281,8 @@ 'jobs', path_prefix='/api' ) webapp.mapper.connect( 'job_search', '/api/jobs/search', controller='jobs', action='search', conditions=dict( method=['POST'] ) ) + webapp.mapper.connect( 'job_inputs', '/api/jobs/{id}/inputs', controller='jobs', action='inputs', conditions=dict( method=['GET'] ) ) + webapp.mapper.connect( 'job_outputs', '/api/jobs/{id}/outputs', controller='jobs', action='outputs', conditions=dict( method=['GET'] ) ) # Job files controllers. Only for consumption by remote job runners. webapp.mapper.resource( 'file', diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/base/interactor.py --- a/test/base/interactor.py +++ b/test/base/interactor.py @@ -44,16 +44,44 @@ self.api_key = self.__get_user_key( twill_test_case.user_api_key, twill_test_case.master_api_key, test_user=test_user ) self.uploads = {} - def verify_output( self, history_id, output_data, outfile, attributes, shed_tool_id, maxseconds ): + def verify_output( self, history_id, output_data, output_testdef, shed_tool_id, maxseconds ): + outfile = output_testdef.outfile + attributes = output_testdef.attributes + name = output_testdef.name self.wait_for_history( history_id, maxseconds ) hid = self.__output_id( output_data ) fetcher = self.__dataset_fetcher( history_id ) ## TODO: Twill version verifys dataset is 'ok' in here. self.twill_test_case.verify_hid( outfile, hda_id=hid, attributes=attributes, dataset_fetcher=fetcher, shed_tool_id=shed_tool_id ) + + primary_datasets = attributes.get( 'primary_datasets', {} ) + if primary_datasets: + job_id = self._dataset_provenance( history_id, hid )[ "job_id" ] + outputs = self._get( "jobs/%s/outputs" % ( job_id ) ).json() + + for designation, ( primary_outfile, primary_attributes ) in primary_datasets.iteritems(): + primary_output = None + for output in outputs: + if output[ "name" ] == '__new_primary_file_%s|%s__' % ( name, designation ): + primary_output = output + break + + if not primary_output: + msg_template = "Failed to find primary dataset with designation [%s] for output with name [%s]" + msg_args = ( designation, name ) + raise Exception( msg_template % msg_args ) + + primary_hda_id = primary_output[ "dataset" ][ "id" ] + self.twill_test_case.verify_hid( primary_outfile, hda_id=primary_hda_id, attributes=primary_attributes, dataset_fetcher=fetcher, shed_tool_id=shed_tool_id ) + self._verify_metadata( history_id, primary_hda_id, primary_attributes ) + + self._verify_metadata( history_id, hid, attributes ) + + def _verify_metadata( self, history_id, hid, attributes ): metadata = attributes.get( 'metadata', {} ).copy() for key, value in metadata.copy().iteritems(): new_key = "metadata_%s" % key - metadata[ new_key ] = metadata[ key ] + metadata[ new_key ] = metadata[ key ] del metadata[ key ] expected_file_type = attributes.get( 'ftype', None ) if expected_file_type: @@ -319,7 +347,10 @@ def __init__( self, twill_test_case ): self.twill_test_case = twill_test_case - def verify_output( self, history, output_data, outfile, attributes, shed_tool_id, maxseconds ): + def verify_output( self, history, output_data, output_testdef, shed_tool_id, maxseconds ): + outfile = output_testdef.outfile + attributes = output_testdef.attributes + hid = output_data.get( 'hid' ) self.twill_test_case.verify_dataset_correctness( outfile, hid=hid, attributes=attributes, shed_tool_id=shed_tool_id, maxseconds=maxseconds ) diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/test_toolbox.py --- a/test/functional/test_toolbox.py +++ b/test/functional/test_toolbox.py @@ -3,6 +3,7 @@ from base.twilltestcase import TwillTestCase from base.interactor import build_interactor, stage_data_in_history from galaxy.tools import DataManagerTool +from galaxy.util import bunch import logging log = logging.getLogger( __name__ ) @@ -52,6 +53,7 @@ for output_index, output_tuple in enumerate(testdef.outputs): # Get the correct hid name, outfile, attributes = output_tuple + output_testdef = bunch.Bunch( name=name, outfile=outfile, attributes=attributes ) try: output_data = data_list[ name ] except (TypeError, KeyError): @@ -64,7 +66,7 @@ output_data = data_list[ len(data_list) - len(testdef.outputs) + output_index ] self.assertTrue( output_data is not None ) try: - galaxy_interactor.verify_output( history, output_data, outfile, attributes=attributes, shed_tool_id=shed_tool_id, maxseconds=maxseconds ) + galaxy_interactor.verify_output( history, output_data, output_testdef=output_testdef, shed_tool_id=shed_tool_id, maxseconds=maxseconds ) except Exception: for stream in ['stdout', 'stderr']: stream_output = galaxy_interactor.get_job_stream( history, output_data, stream=stream ) diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/tools/multi_output.xml --- a/test/functional/tools/multi_output.xml +++ b/test/functional/tools/multi_output.xml @@ -1,7 +1,7 @@ <tool id="multi_output" name="Multi_Output" description="multi_output" force_history_refresh="True" version="0.1.0"><command> echo "Hello" > $report; - echo "World" > '${__new_file_path__}/primary_${report.id}_moo_visible_?' + echo "World Contents" > '${__new_file_path__}/primary_${report.id}_world_visible_?' </command><inputs><param name="input" type="integer" value="7" /> @@ -16,6 +16,11 @@ <assert_contents><has_line line="Hello" /></assert_contents> + <discovered_dataset designation="world"> + <assert_contents> + <has_line line="World Contents" /> + </assert_contents> + </discovered_dataset></output></test></tests> diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/tools/multi_output_configured.xml --- /dev/null +++ b/test/functional/tools/multi_output_configured.xml @@ -0,0 +1,43 @@ +<tool id="multi_output_configured" name="Multi_Output_Configured" description="multi_output_configured" force_history_refresh="True" version="0.1.0"> + <command> + echo "Hello" > $report; + mkdir subdir1; + echo "This" > subdir1/this.txt; + echo "That" > subdir1/that.txt; + mkdir subdir2; + echo "1" > subdir2/CUSTOM_1.txt; + echo "2" > subdir2/CUSTOM_2.tabular; + echo "3" > subdir2/CUSTOM_3.txt; + </command> + <inputs> + <param name="input" type="integer" value="7" /> + </inputs> + <outputs> + <data format="txt" name="report"> + <discover_datasets pattern="__designation_and_ext__" directory="subdir1" /> + <discover_datasets pattern="CUSTOM_(?P<designation>.+)\.(?P<ext>.+)" directory="subdir2" /> + </data> + </outputs> + <tests> + <test> + <param name="input" value="7" /> + <output name="report"> + <assert_contents> + <has_line line="Hello" /> + </assert_contents> + <discovered_dataset designation="this" ftype="txt"> + <assert_contents><has_line line="This" /></assert_contents> + </discovered_dataset> + <discovered_dataset designation="that" ftype="txt"> + <assert_contents><has_line line="That" /></assert_contents> + </discovered_dataset> + <discovered_dataset designation="1" ftype="txt"> + <assert_contents><has_line line="1" /></assert_contents> + </discovered_dataset> + <discovered_dataset designation="2" ftype="tabular"> + <assert_contents><has_line line="2" /></assert_contents> + </discovered_dataset> + </output> + </test> + </tests> +</tool> diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/tools/samples_tool_conf.xml --- a/test/functional/tools/samples_tool_conf.xml +++ b/test/functional/tools/samples_tool_conf.xml @@ -8,6 +8,7 @@ <tool file="multi_page.xml"/><tool file="multi_select.xml" /><tool file="multi_output.xml" /> + <tool file="multi_output_configured.xml" /><tool file="composite_output.xml" /><tool file="metadata.xml" /><tool file="output_order.xml" /> diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/functional/workflow.py --- a/test/functional/workflow.py +++ b/test/functional/workflow.py @@ -4,6 +4,7 @@ from base.interactor import GalaxyInteractorApi, stage_data_in_history from galaxy.util import parse_xml +from galaxy.util import bunch from galaxy.tools.test import parse_param_elem, require_file, test_data_iter, parse_output_elems from json import load, dumps @@ -66,10 +67,11 @@ for expected_output_def in workflow_test.outputs: # Get the correct hid name, outfile, attributes = expected_output_def + output_testdef = bunch.Bunch( name=name, outfile=outfile, attributes=attributes ) output_data = outputs[ int( name ) ] try: - galaxy_interactor.verify_output( test_history, output_data, outfile, attributes=attributes, shed_tool_id=None, maxseconds=maxseconds ) + galaxy_interactor.verify_output( test_history, output_data, output_testdef=output_testdef, shed_tool_id=None, maxseconds=maxseconds ) except Exception: for stream in ['stdout', 'stderr']: stream_output = galaxy_interactor.get_job_stream( test_history, output_data, stream=stream ) diff -r 74b6e23ed7882f3c091d2b66ce85025241372017 -r 8e6cda4c1b3d1685a8ce07412dc542f34eb8b44b test/unit/tools/test_collect_primary_datasets.py --- a/test/unit/tools/test_collect_primary_datasets.py +++ b/test/unit/tools/test_collect_primary_datasets.py @@ -5,6 +5,8 @@ import tools_support from galaxy import model +from galaxy import util +from galaxy.tools.parameters import output_collect DEFAULT_TOOL_OUTPUT = "out1" DEFAULT_EXTRA_NAME = "test1" @@ -114,6 +116,75 @@ extra_job_assoc = filter( lambda job_assoc: job_assoc.name.startswith( "__" ), self.job.output_datasets )[ 0 ] assert extra_job_assoc.name == "__new_primary_file_out1|test1__" + def test_pattern_override_designation( self ): + self._replace_output_collectors( '''<output><discover_datasets pattern="__designation__" directory="subdir" ext="txt" /></output>''' ) + self._setup_extra_file( subdir="subdir", filename="foo.txt" ) + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + assert len( primary_outputs ) == 1 + created_hda = primary_outputs.values()[ 0 ] + assert "foo.txt" in created_hda.name + assert created_hda.ext == "txt" + + def test_name_and_ext_pattern( self ): + self._replace_output_collectors( '''<output><discover_datasets pattern="__name_and_ext__" directory="subdir" /></output>''' ) + self._setup_extra_file( subdir="subdir", filename="foo1.txt" ) + self._setup_extra_file( subdir="subdir", filename="foo2.tabular" ) + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + assert len( primary_outputs ) == 2 + assert primary_outputs[ "foo1" ].ext == "txt" + assert primary_outputs[ "foo2" ].ext == "tabular" + + def test_custom_pattern( self ): + # Hypothetical oral metagenomic classifier that populates a directory + # of files based on name and genome. Use custom regex pattern to grab + # and classify these files. + self._replace_output_collectors( '''<output><discover_datasets pattern="(?P<designation>.*)__(?P<dbkey>.*).fasta" directory="genome_breakdown" ext="fasta" /></output>''' ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp1__hg19.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp2__lactLact.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp3__hg19.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp4__lactPlan.fasta" ) + self._setup_extra_file( subdir="genome_breakdown", filename="samp5__fusoNucl.fasta" ) + + # Put a file in directory we don't care about, just to make sure + # it doesn't get picked up by pattern. + self._setup_extra_file( subdir="genome_breakdown", filename="overview.txt" ) + + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + assert len( primary_outputs ) == 5 + genomes = dict( samp1="hg19", samp2="lactLact", samp3="hg19", samp4="lactPlan", samp5="fusoNucl" ) + for key, hda in primary_outputs.iteritems(): + assert hda.dbkey == genomes[ key ] + + def test_name_versus_designation( self ): + """ This test demonstrates the difference between name and desgination + in grouping patterns and named patterns such as __designation__, + __name__, __designation_and_ext__, and __name_and_ext__. + """ + self._replace_output_collectors( '''<output> + <discover_datasets pattern="__name_and_ext__" directory="subdir_for_name_discovery" /> + <discover_datasets pattern="__designation_and_ext__" directory="subdir_for_designation_discovery" /> + </output>''') + self._setup_extra_file( subdir="subdir_for_name_discovery", filename="example1.txt" ) + self._setup_extra_file( subdir="subdir_for_designation_discovery", filename="example2.txt" ) + primary_outputs = self._collect( )[ DEFAULT_TOOL_OUTPUT ] + name_output = primary_outputs[ "example1" ] + designation_output = primary_outputs[ "example2" ] + # While name is also used for designation, designation is not the name - + # it is used in the calculation of the name however... + assert name_output.name == "example1" + assert designation_output.name == "%s (%s)" % ( self.hda.name, "example2" ) + + def test_cannot_read_files_outside_job_directory( self ): + self._replace_output_collectors( '''<output> + <discover_datasets pattern="__name_and_ext__" directory="../../secrets" /> + </output>''') + exception_thrown = False + try: + self._collect( ) + except Exception: + exception_thrown = True + assert exception_thrown + def _collect_default_extra( self, **kwargs ): return self._collect( **kwargs )[ DEFAULT_TOOL_OUTPUT ][ DEFAULT_EXTRA_NAME ] @@ -122,6 +193,12 @@ job_working_directory = self.test_directory return self.tool.collect_primary_datasets( self.outputs, job_working_directory ) + def _replace_output_collectors( self, xml_str ): + # Rewrite tool as if it had been created with output containing + # supplied dataset_collector elem. + elem = util.parse_xml_string( xml_str ) + self.tool.outputs[ DEFAULT_TOOL_OUTPUT ].dataset_collectors = output_collect.dataset_collectors_from_elem( elem ) + def _append_job_json( self, object, output_path=None, line_type="new_primary_dataset" ): object[ "type" ] = line_type if output_path: @@ -133,7 +210,8 @@ def _setup_extra_file( self, **kwargs ): path = kwargs.get( "path", None ) - if not path: + filename = kwargs.get( "filename", None ) + if not path and not filename: name = kwargs.get( "name", DEFAULT_EXTRA_NAME ) visible = kwargs.get( "visible", "visible" ) ext = kwargs.get( "ext", "data" ) @@ -142,6 +220,13 @@ path = os.path.join( directory, "primary_%s_%s_%s_%s" % template_args ) if "dbkey" in kwargs: path = "%s_%s" % ( path, kwargs[ "dbkey" ] ) + if not path: + assert filename + subdir = kwargs.get( "subdir", "." ) + path = os.path.join( self.test_directory, subdir, filename ) + directory = os.path.dirname( path ) + if not os.path.exists( directory ): + os.makedirs( directory ) contents = kwargs.get( "contents", "test contents" ) open( path, "w" ).write( contents ) return path Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.