28 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/62d6ae1c93d9/ Changeset: 62d6ae1c93d9 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Hack to fix test_executions for 0951e07. This whole concept I had of using these models in unit tests without a database connection present probably needs to be done away with. Affected #: 1 file diff -r e18b36121df0bcb9a30906716e5b0c8a747bd138 -r 62d6ae1c93d9e4cdcba84dd15bef6bd9ef8d08de test/unit/tools/test_execution.py --- a/test/unit/tools/test_execution.py +++ b/test/unit/tools/test_execution.py @@ -272,7 +272,7 @@ def __init__( self, app, history ): self.app = app self.history = history - self.history = galaxy.model.History() + self.history._active_datasets_children_and_roles = filter( lambda hda: hda.active and hda.history == history, self.app.model.context.model_objects[ galaxy.model.HistoryDatasetAssociation ] ) self.workflow_building_mode = False self.webapp = Bunch( name="galaxy" ) self.sa_session = self.app.model.context https://bitbucket.org/galaxy/galaxy-central/commits/36f51ba22382/ Changeset: 36f51ba22382 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Reduce code duplication between JobWrapper and TaskWrapper. Affected #: 1 file diff -r 62d6ae1c93d9e4cdcba84dd15bef6bd9ef8d08de -r 36f51ba223821a5eaee6db123ff1efea12390f66 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -647,10 +647,7 @@ if not os.path.exists( self.working_directory ): os.mkdir( self.working_directory ) - # Restore parameters from the database - job = self.get_job() - if job.user is None and job.galaxy_session is None: - raise Exception( 'Job %s has no user and no session.' % job.id ) + job = self._load_job() incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) incoming = self.tool.params_from_strings( incoming, self.app ) @@ -718,6 +715,14 @@ self.version_string_cmd = self.tool.version_string_cmd return extra_filenames + def _load_job( self ): + # Load job from database and verify it has user or session. + # Restore parameters from the database + job = self.get_job() + if job.user is None and job.galaxy_session is None: + raise Exception( 'Job %s has no user and no session.' % job.id ) + return job + def fail( self, message, exception=False, stdout="", stderr="", exit_code=None ): """ Indicate job failure by setting state and message on all output @@ -1398,10 +1403,8 @@ config files. """ # Restore parameters from the database - job = self.get_job() + job = self._load_job() task = self.get_task() - if job.user is None and job.galaxy_session is None: - raise Exception( 'Job %s has no user and no session.' % job.id ) incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) incoming = self.tool.params_from_strings( incoming, self.app ) # Do any validation that could not be done at job creation https://bitbucket.org/galaxy/galaxy-central/commits/17abb2bbdf44/ Changeset: 17abb2bbdf44 User: jmchilton Date: 2014-02-11 04:34:42 Summary: TaskWrapper - utilize user_template_environment like JobWrapper. Affected #: 1 file diff -r 36f51ba223821a5eaee6db123ff1efea12390f66 -r 17abb2bbdf442dc5f5b606d5726e073368a638c2 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1415,15 +1415,10 @@ inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) # DBTODO New method for generating command line for a task? - # These can be passed on the command line if wanted as $userId $userEmail - if job.history and job.history.user: # check for anonymous user! - userId = '%d' % job.history.user.id - userEmail = str(job.history.user.email) - else: - userId = 'Anonymous' - userEmail = 'Anonymous' - incoming['userId'] = userId - incoming['userEmail'] = userEmail + + # These can be passed on the command line if wanted as $__user_*__ + incoming.update( model.User.user_template_environment( job.history and job.history.user ) ) + # Build params, done before hook so hook can use param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, self.get_output_fnames(), self.working_directory ) fnames = {} https://bitbucket.org/galaxy/galaxy-central/commits/aa53f2b3b52e/ Changeset: aa53f2b3b52e User: jmchilton Date: 2014-02-11 04:34:42 Summary: Reduce code duplication between JobWrapper and TaskWrapper. Create shared method for building up param dict. Affected #: 1 file diff -r 17abb2bbdf442dc5f5b606d5726e073368a638c2 -r aa53f2b3b52ebd274b82dface195561778f2459b lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -649,46 +649,8 @@ job = self._load_job() - incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) - incoming = self.tool.params_from_strings( incoming, self.app ) - # Do any validation that could not be done at job creation - self.tool.handle_unvalidated_param_values( incoming, self.app ) - # Restore input / output data lists - inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) - out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] ) - inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) - out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) + param_dict = self._build_param_dict( job, populate_special_output_file=True ) - # Set up output dataset association for export history jobs. Because job - # uses a Dataset rather than an HDA or LDA, it's necessary to set up a - # fake dataset association that provides the needed attributes for - # preparing a job. - class FakeDatasetAssociation ( object ): - def __init__( self, dataset=None ): - self.dataset = dataset - self.file_name = dataset.file_name - self.metadata = dict() - self.children = [] - special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() - if not special: - special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() - if special: - out_data[ "output_file" ] = FakeDatasetAssociation( dataset=special.dataset ) - - # These can be passed on the command line if wanted as $__user_*__ - incoming.update( model.User.user_template_environment( job.history and job.history.user ) ) - - # Build params, done before hook so hook can use - param_dict = self.tool.build_param_dict( incoming, - inp_data, out_data, - self.get_output_fnames(), - self.working_directory ) - # Certain tools require tasks to be completed prior to job execution - # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). - self.tool.exec_before_job( self.queue.app, inp_data, out_data, param_dict ) - # Run the before queue ("exec_before_job") hook - self.tool.call_hook( 'exec_before_job', self.queue.app, inp_data=inp_data, - out_data=out_data, tool=self.tool, param_dict=incoming) self.sa_session.flush() # Build any required config files config_filenames = self.tool.build_config_files( param_dict, self.working_directory ) @@ -723,6 +685,52 @@ raise Exception( 'Job %s has no user and no session.' % job.id ) return job + def _build_param_dict( self, job, populate_special_output_file=True ): + incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) + incoming = self.tool.params_from_strings( incoming, self.app ) + # Do any validation that could not be done at job creation + self.tool.handle_unvalidated_param_values( incoming, self.app ) + # Restore input / output data lists + inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) + out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] ) + inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) + out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) + + if populate_special_output_file: + # Set up output dataset association for export history jobs. Because job + # uses a Dataset rather than an HDA or LDA, it's necessary to set up a + # fake dataset association that provides the needed attributes for + # preparing a job. + class FakeDatasetAssociation ( object ): + def __init__( self, dataset=None ): + self.dataset = dataset + self.file_name = dataset.file_name + self.metadata = dict() + self.children = [] + special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() + if not special: + special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() + if special: + out_data[ "output_file" ] = FakeDatasetAssociation( dataset=special.dataset ) + + # These can be passed on the command line if wanted as $__user_*__ + incoming.update( model.User.user_template_environment( job.history and job.history.user ) ) + + # Build params, done before hook so hook can use + param_dict = self.tool.build_param_dict( incoming, + inp_data, out_data, + self.get_output_fnames(), + self.working_directory ) + + # Certain tools require tasks to be completed prior to job execution + # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). + self.tool.exec_before_job( self.queue.app, inp_data, out_data, param_dict ) + # Run the before queue ("exec_before_job") hook + self.tool.call_hook( 'exec_before_job', self.queue.app, inp_data=inp_data, + out_data=out_data, tool=self.tool, param_dict=incoming) + + return param_dict + def fail( self, message, exception=False, stdout="", stderr="", exit_code=None ): """ Indicate job failure by setting state and message on all output @@ -1405,34 +1413,20 @@ # Restore parameters from the database job = self._load_job() task = self.get_task() - incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) - incoming = self.tool.params_from_strings( incoming, self.app ) - # Do any validation that could not be done at job creation - self.tool.handle_unvalidated_param_values( incoming, self.app ) - # Restore input / output data lists - inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) - out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] ) - inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) - out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) + # DBTODO New method for generating command line for a task? - # These can be passed on the command line if wanted as $__user_*__ - incoming.update( model.User.user_template_environment( job.history and job.history.user ) ) + param_dict = self._build_param_dict( job, populate_special_output_file=False ) - # Build params, done before hook so hook can use - param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, self.get_output_fnames(), self.working_directory ) + # Build dict of file name re-writes for split up tasks. fnames = {} for v in self.get_input_fnames(): fnames[v] = os.path.join(self.working_directory, os.path.basename(v)) for dp in [x.real_path for x in self.get_output_fnames()]: fnames[dp] = os.path.join(self.working_directory, os.path.basename(dp)) - # Certain tools require tasks to be completed prior to job execution - # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). - self.tool.exec_before_job( self.queue.app, inp_data, out_data, param_dict ) - # Run the before queue ("exec_before_job") hook - self.tool.call_hook( 'exec_before_job', self.queue.app, inp_data=inp_data, - out_data=out_data, tool=self.tool, param_dict=incoming) + self.sa_session.flush() + # Build any required config files config_filenames = self.tool.build_config_files( param_dict, self.working_directory ) for config_filename in config_filenames: https://bitbucket.org/galaxy/galaxy-central/commits/66242bb7cfc3/ Changeset: 66242bb7cfc3 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Rework logic in tools related to false paths. Basically, if a false path is set trust it, don't explicitly check app.config.outputs_to_working_directory. The hope here is that this logic can then be used in other scenarios where a false_path might be setup at the layers (e.g. job_runner) above tool. Affected #: 1 file diff -r aa53f2b3b52ebd274b82dface195561778f2459b -r 66242bb7cfc3365f71136eae90e25f543a121e2c lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2664,17 +2664,15 @@ if data: for child in data.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) + output_false_paths = dict( [ ( dp.real_path, dp.false_path ) for dp in output_paths if getattr( dp, "false_path", None ) ] ) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. - if self.app.config.outputs_to_working_directory: - try: - false_path = [ dp.false_path for dp in output_paths if dp.real_path == hda.file_name ][0] - param_dict[name] = DatasetFilenameWrapper( hda, false_path = false_path ) - open( false_path, 'w' ).close() - except IndexError: - log.warning( "Unable to determine alternate path for writing job outputs, outputs will be written to their real paths" ) - param_dict[name] = DatasetFilenameWrapper( hda ) + real_path = hda.file_name + if real_path in output_false_paths: + false_path = output_false_paths[ real_path ] + param_dict[name] = DatasetFilenameWrapper( hda, false_path = false_path ) + open( false_path, 'w' ).close() else: param_dict[name] = DatasetFilenameWrapper( hda ) # Provide access to a path to store additional files https://bitbucket.org/galaxy/galaxy-central/commits/429fc453199b/ Changeset: 429fc453199b User: jmchilton Date: 2014-02-11 04:34:42 Summary: Eliminate output path rewriting in TaskWrapper. Rework task splitting to rewrite paths before cheetah evalulation not after. Utilizing DatasetPath.false_path mechanisms implemented for config.outputs_to_working_directory. I think this could be made less convoluted if the combination of outputs_to_working_directory and Task splitting were optimized in some way - but this changeset keeps the behavior of that the same (copying twice) just to isolate changes to how these paths are evaluated in Cheetah. Affected #: 3 files diff -r 66242bb7cfc3365f71136eae90e25f543a121e2c -r 429fc453199b63568c81d8afe2c662fc2595864d lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -26,6 +26,9 @@ from galaxy.util.json import from_json_string from galaxy.util import unicodify from .output_checker import check_output +from .datasets import TaskPathRewriter +from .datasets import OutputsToWorkingDirectoryPathRewriter +from .datasets import NullDatasetPathRewriter log = logging.getLogger( __name__ ) @@ -577,6 +580,7 @@ log.debug('(%s) Working directory for job is: %s' % (self.job_id, self.working_directory)) except ObjectInvalid: raise Exception('Unable to create job working directory, job failure') + self.dataset_path_rewriter = self._job_dataset_path_rewriter( self.working_directory ) self.output_paths = None self.output_hdas_and_paths = None self.tool_provided_job_metadata = None @@ -590,6 +594,13 @@ self.__user_system_pwent = None self.__galaxy_system_pwent = None + def _job_dataset_path_rewriter( self, working_directory ): + if self.app.config.outputs_to_working_directory: + dataset_path_rewriter = OutputsToWorkingDirectoryPathRewriter( working_directory ) + else: + dataset_path_rewriter = NullDatasetPathRewriter( ) + return dataset_path_rewriter + def can_split( self ): # Should the job handler split this job up? return self.app.config.use_tasked_jobs and self.tool.parallelism @@ -1169,6 +1180,8 @@ return self.output_hdas_and_paths def compute_outputs( self ) : + dataset_path_rewriter = self.dataset_path_rewriter + class DatasetPath( object ): def __init__( self, dataset_id, real_path, false_path=None, mutable=True ): self.dataset_id = dataset_id @@ -1187,21 +1200,18 @@ if not special: special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() false_path = None - if self.app.config.outputs_to_working_directory: - self.output_paths = [] - self.output_hdas_and_paths = {} - for name, hda in [ ( da.name, da.dataset ) for da in job.output_datasets + job.output_library_datasets ]: - false_path = os.path.abspath( os.path.join( self.working_directory, "galaxy_dataset_%d.dat" % hda.dataset.id ) ) - dsp = DatasetPath( hda.dataset.id, hda.dataset.file_name, false_path, mutable=hda.dataset.external_filename is None ) - self.output_paths.append( dsp ) - self.output_hdas_and_paths[name] = hda, dsp - if special: - false_path = os.path.abspath( os.path.join( self.working_directory, "galaxy_dataset_%d.dat" % special.dataset.id ) ) - else: - results = [ ( da.name, da.dataset, DatasetPath( da.dataset.dataset.id, da.dataset.file_name, mutable=da.dataset.dataset.external_filename is None ) ) for da in job.output_datasets + job.output_library_datasets ] - self.output_paths = [t[2] for t in results] - self.output_hdas_and_paths = dict([(t[0], t[1:]) for t in results]) + + results = [] + for da in job.output_datasets + job.output_library_datasets: + da_false_path = dataset_path_rewriter.rewrite_dataset_path( da.dataset, 'output' ) + mutable = da.dataset.dataset.external_filename is None + dataset_path = DatasetPath( da.dataset.dataset.id, da.dataset.file_name, false_path=da_false_path, mutable=mutable ) + results.append( ( da.name, da.dataset, dataset_path ) ) + + self.output_paths = [t[2] for t in results] + self.output_hdas_and_paths = dict([(t[0], t[1:]) for t in results]) if special: + false_path = dataset_path_rewriter.rewrite_dataset_path( special.dataset, 'output' ) dsp = DatasetPath( special.dataset.id, special.dataset.file_name, false_path ) self.output_paths.append( dsp ) return self.output_paths @@ -1371,7 +1381,10 @@ def __init__(self, task, queue): super(TaskWrapper, self).__init__(task.job, queue) self.task_id = task.id - self.working_directory = task.working_directory + working_directory = task.working_directory + self.working_directory = working_directory + job_dataset_path_rewriter = self._job_dataset_path_rewriter( self.working_directory ) + self.dataset_path_rewriter = TaskPathRewriter( working_directory, job_dataset_path_rewriter ) if task.prepare_input_files_cmd is not None: self.prepare_input_files_cmds = [ task.prepare_input_files_cmd ] else: @@ -1422,8 +1435,8 @@ fnames = {} for v in self.get_input_fnames(): fnames[v] = os.path.join(self.working_directory, os.path.basename(v)) - for dp in [x.real_path for x in self.get_output_fnames()]: - fnames[dp] = os.path.join(self.working_directory, os.path.basename(dp)) + #for dp in [x.real_path for x in self.get_output_fnames()]: + # fnames[dp] = os.path.join(self.working_directory, os.path.basename(dp)) self.sa_session.flush() diff -r 66242bb7cfc3365f71136eae90e25f543a121e2c -r 429fc453199b63568c81d8afe2c662fc2595864d lib/galaxy/jobs/datasets.py --- /dev/null +++ b/lib/galaxy/jobs/datasets.py @@ -0,0 +1,64 @@ +import os.path + +from abc import ABCMeta +from abc import abstractmethod + + +class DatasetPathRewriter( object ): + """ Used by runner to rewrite paths. """ + __metaclass__ = ABCMeta + + @abstractmethod + def rewrite_dataset_path( self, dataset, dataset_type ): + """ + Dataset type is 'input' or 'output'. + Return None to indicate not to rewrite this path. + """ + + +class NullDatasetPathRewriter( object ): + """ Used by default for jobwrapper, do not rewrite anything. + """ + + def rewrite_dataset_path( self, dataset, dataset_type ): + """ Keep path the same. + """ + return None + + +class OutputsToWorkingDirectoryPathRewriter( object ): + """ Rewrites all paths to place them in the specified working + directory for normal jobs when Galaxy is configured with + app.config.outputs_to_working_directory. Job runner base class + is responsible for copying these out after job is complete. + """ + + def __init__( self, working_directory ): + self.working_directory = working_directory + + def rewrite_dataset_path( self, dataset, dataset_type ): + """ Keep path the same. + """ + if dataset_type == 'output': + false_path = os.path.abspath( os.path.join( self.working_directory, "galaxy_dataset_%d.dat" % dataset.id ) ) + return false_path + else: + return None + + +class TaskPathRewriter( object ): + """ Rewrites all paths to place them in the specified working + directory for TaskWrapper. TaskWrapper is responsible for putting + them there and pulling them out. + """ + + def __init__( self, working_directory, job_dataset_path_rewriter ): + self.working_directory = working_directory + self.job_dataset_path_rewriter = job_dataset_path_rewriter + + def rewrite_dataset_path( self, dataset, dataset_type ): + """ + """ + dataset_file_name = dataset.file_name + job_file_name = self.job_dataset_path_rewriter.rewrite_dataset_path( dataset, dataset_type ) or dataset_file_name + return os.path.join( self.working_directory, os.path.basename( job_file_name ) ) diff -r 66242bb7cfc3365f71136eae90e25f543a121e2c -r 429fc453199b63568c81d8afe2c662fc2595864d lib/galaxy/jobs/splitters/multi.py --- a/lib/galaxy/jobs/splitters/multi.py +++ b/lib/galaxy/jobs/splitters/multi.py @@ -126,11 +126,12 @@ assert task_dirs, "Should be at least one sub-task!" # TODO: Output datasets can be very complex. This doesn't handle metadata files outputs = job_wrapper.get_output_hdas_and_fnames() + output_paths = job_wrapper.get_output_fnames() pickone_done = [] task_dirs = [os.path.join(working_directory, x) for x in os.listdir(working_directory) if x.startswith('task_')] task_dirs.sort(key = lambda x: int(x.split('task_')[-1])) - for output in outputs: - output_file_name = str(outputs[output][1]) + for index, output in enumerate( outputs ): + output_file_name = str( output_paths[ index ] ) # Use false_path if set, else real path. base_output_name = os.path.basename(output_file_name) if output in merge_outputs: output_dataset = outputs[output][0] https://bitbucket.org/galaxy/galaxy-central/commits/36f39f9bb3fa/ Changeset: 36f39f9bb3fa User: jmchilton Date: 2014-02-11 04:34:42 Summary: Reorganize tool dataset wrapper instantiation for future extension. This changeset should change how anything functions, but will make subsequent changes less dramatic. Affected #: 1 file diff -r 429fc453199b63568c81d8afe2c662fc2595864d -r 36f39f9bb3fa476a18bc332997e27035e7c1dadd lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2614,11 +2614,14 @@ tool = Bunch( conversion_name = Bunch( extensions = conv_ext ) ), name = conversion_name ) # Wrap actual input dataset + dataset = input_values[ input.name ] + wrapper_kwds = dict( + datatypes_registry=self.app.datatypes_registry, + tool=self, + name=input.name + ) input_values[ input.name ] = \ - DatasetFilenameWrapper( input_values[ input.name ], - datatypes_registry = self.app.datatypes_registry, - tool = self, - name = input.name ) + DatasetFilenameWrapper( dataset, **wrapper_kwds ) elif isinstance( input, SelectToolParameter ): input_values[ input.name ] = SelectToolParameterWrapper( input, input_values[ input.name ], self.app, other_values = param_dict ) @@ -2657,10 +2660,12 @@ for name, data in input_datasets.items(): param_dict_value = param_dict.get(name, None) if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)): - param_dict[name] = DatasetFilenameWrapper( data, - datatypes_registry = self.app.datatypes_registry, - tool = self, - name = name ) + wrapper_kwds = dict( + datatypes_registry=self.app.datatypes_registry, + tool=self, + name=name, + ) + param_dict[name] = DatasetFilenameWrapper( data, **wrapper_kwds ) if data: for child in data.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) @@ -3573,7 +3578,12 @@ def __init__( self, datasets, **kwargs ): if not isinstance(datasets, list): datasets = [datasets] - list.__init__( self, [DatasetFilenameWrapper(dataset, **kwargs) for dataset in datasets] ) + + def to_wrapper( dataset ): + wrapper_kwds = kwargs.copy() + return DatasetFilenameWrapper( dataset, **wrapper_kwds ) + + list.__init__( self, map( to_wrapper, datasets ) ) def json_fix( val ): https://bitbucket.org/galaxy/galaxy-central/commits/c816b894910f/ Changeset: c816b894910f User: jmchilton Date: 2014-02-11 04:34:42 Summary: Refactor DatasetPath out into newer galaxy.jobs.datasets module. Add unit test for DatasetPath class. Affected #: 3 files diff -r 36f39f9bb3fa476a18bc332997e27035e7c1dadd -r c816b894910f9c82b3ec525b6394a4f6e64186f7 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -29,6 +29,7 @@ from .datasets import TaskPathRewriter from .datasets import OutputsToWorkingDirectoryPathRewriter from .datasets import NullDatasetPathRewriter +from .datasets import DatasetPath log = logging.getLogger( __name__ ) @@ -1182,18 +1183,6 @@ def compute_outputs( self ) : dataset_path_rewriter = self.dataset_path_rewriter - class DatasetPath( object ): - def __init__( self, dataset_id, real_path, false_path=None, mutable=True ): - self.dataset_id = dataset_id - self.real_path = real_path - self.false_path = false_path - self.mutable = mutable - - def __str__( self ): - if self.false_path is None: - return self.real_path - else: - return self.false_path job = self.get_job() # Job output datasets are combination of history, library, jeha and gitd datasets. special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() diff -r 36f39f9bb3fa476a18bc332997e27035e7c1dadd -r c816b894910f9c82b3ec525b6394a4f6e64186f7 lib/galaxy/jobs/datasets.py --- a/lib/galaxy/jobs/datasets.py +++ b/lib/galaxy/jobs/datasets.py @@ -1,9 +1,26 @@ +""" +Utility classes allowing Job interface to reason about datasets. +""" import os.path from abc import ABCMeta from abc import abstractmethod +class DatasetPath( object ): + def __init__( self, dataset_id, real_path, false_path=None, mutable=True ): + self.dataset_id = dataset_id + self.real_path = real_path + self.false_path = false_path + self.mutable = mutable + + def __str__( self ): + if self.false_path is None: + return self.real_path + else: + return self.false_path + + class DatasetPathRewriter( object ): """ Used by runner to rewrite paths. """ __metaclass__ = ABCMeta diff -r 36f39f9bb3fa476a18bc332997e27035e7c1dadd -r c816b894910f9c82b3ec525b6394a4f6e64186f7 test/unit/jobs/test_datasets.py --- /dev/null +++ b/test/unit/jobs/test_datasets.py @@ -0,0 +1,17 @@ +from galaxy.jobs.datasets import DatasetPath + + +def test_dataset_path(): + dataset_path_1 = DatasetPath( 1, "/galaxy/database/files/dataset_1.dat" ) + assert dataset_path_1.dataset_id == 1 + assert dataset_path_1.real_path == "/galaxy/database/files/dataset_1.dat" + assert dataset_path_1.false_path is None + assert dataset_path_1.mutable + assert str( dataset_path_1 ) == "/galaxy/database/files/dataset_1.dat" + + dataset_path_2 = DatasetPath( 2, "/galaxy/database/files/dataset_2.dat", false_path="/mnt/galaxyData/files/dataset_2.dat", mutable=False ) + assert dataset_path_2.dataset_id == 2 + assert dataset_path_2.real_path == "/galaxy/database/files/dataset_2.dat" + assert dataset_path_2.false_path == "/mnt/galaxyData/files/dataset_2.dat" + assert not dataset_path_2.mutable + assert str( dataset_path_2 ) == "/mnt/galaxyData/files/dataset_2.dat" https://bitbucket.org/galaxy/galaxy-central/commits/55cf1bd3df0e/ Changeset: 55cf1bd3df0e User: jmchilton Date: 2014-02-11 04:34:42 Summary: Eliminate input (hence all) path rewriting in TaskWrapper. Extend concept of false paths to apply to inputs as well, use the rewrites implemented in TaskPathRewriter. Affected #: 2 files diff -r c816b894910f9c82b3ec525b6394a4f6e64186f7 -r 55cf1bd3df0ecfa058768e47dd39b9e89f619e6e lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -731,8 +731,9 @@ # Build params, done before hook so hook can use param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, - self.get_output_fnames(), - self.working_directory ) + output_paths=self.get_output_fnames(), + job_working_directory=self.working_directory, + input_paths=self.get_input_paths( job ) ) # Certain tools require tasks to be completed prior to job execution # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). @@ -1165,6 +1166,18 @@ filenames.extend(self.get_input_dataset_fnames(da.dataset)) return filenames + def get_input_paths( self, job=None ): + if job is None: + job = self.get_job() + paths = [] + for da in job.input_datasets + job.input_library_datasets: # da is JobToInputDatasetAssociation object + if da.dataset: + filenames = self.get_input_dataset_fnames(da.dataset) + for real_path in filenames: + false_path = self.dataset_path_rewriter.rewrite_dataset_path( da.dataset, 'input' ) + paths.append( DatasetPath( da.id, real_path=real_path, false_path=false_path, mutable=False ) ) + return paths + def get_output_fnames( self ): if self.output_paths is None: self.compute_outputs() @@ -1420,30 +1433,15 @@ param_dict = self._build_param_dict( job, populate_special_output_file=False ) - # Build dict of file name re-writes for split up tasks. - fnames = {} - for v in self.get_input_fnames(): - fnames[v] = os.path.join(self.working_directory, os.path.basename(v)) - #for dp in [x.real_path for x in self.get_output_fnames()]: - # fnames[dp] = os.path.join(self.working_directory, os.path.basename(dp)) - self.sa_session.flush() # Build any required config files config_filenames = self.tool.build_config_files( param_dict, self.working_directory ) - for config_filename in config_filenames: - config_contents = open(config_filename, "r").read() - for k, v in fnames.iteritems(): - config_contents = config_contents.replace(k, v) - open(config_filename, "w").write(config_contents) # FIXME: Build the param file (might return None, DEPRECATED) param_filename = self.tool.build_param_file( param_dict, self.working_directory ) # Build the job's command line self.command_line = self.tool.build_command_line( param_dict ) - # HACK, Fix this when refactored. - for k, v in fnames.iteritems(): - self.command_line = self.command_line.replace(k, v) # FIXME: for now, tools get Galaxy's lib dir in their path if self.command_line and self.command_line.startswith( 'python' ): self.galaxy_lib_dir = os.path.abspath( "lib" ) # cwd = galaxy root diff -r c816b894910f9c82b3ec525b6394a4f6e64186f7 -r 55cf1bd3df0ecfa058768e47dd39b9e89f619e6e lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2551,7 +2551,7 @@ message = e.message return message - def build_param_dict( self, incoming, input_datasets, output_datasets, output_paths, job_working_directory ): + def build_param_dict( self, incoming, input_datasets, output_datasets, output_paths, job_working_directory, input_paths=[] ): """ Build the dictionary of parameters for substituting into the command line. Each value is wrapped in a `InputValueWrapper`, which allows @@ -2564,6 +2564,8 @@ # All parameters go into the param_dict param_dict.update( incoming ) + input_false_paths = dict( [ ( dp.real_path, dp.false_path ) for dp in input_paths if getattr( dp, "false_path", None ) ] ) + def wrap_values( inputs, input_values ): """ Wraps parameters as neccesary. @@ -2579,6 +2581,7 @@ elif isinstance( input, DataToolParameter ) and input.multiple: input_values[ input.name ] = \ DatasetListWrapper( input_values[ input.name ], + false_paths=input_false_paths, datatypes_registry = self.app.datatypes_registry, tool = self, name = input.name ) @@ -2620,6 +2623,9 @@ tool=self, name=input.name ) + real_path = dataset.file_name + if real_path in input_false_paths: + wrapper_kwds[ "false_path" ] = input_false_paths[ real_path ] input_values[ input.name ] = \ DatasetFilenameWrapper( dataset, **wrapper_kwds ) elif isinstance( input, SelectToolParameter ): @@ -2627,6 +2633,7 @@ input, input_values[ input.name ], self.app, other_values = param_dict ) elif isinstance( input, LibraryDatasetToolParameter ): + # TODO: Handle input rewrites in here? How to test LibraryDatasetToolParameters? input_values[ input.name ] = LibraryDatasetValueWrapper( input, input_values[ input.name ], param_dict ) @@ -2665,6 +2672,10 @@ tool=self, name=name, ) + real_path = data.file_name + if real_path in input_false_paths: + false_path = input_false_paths[ real_path ] + wrapper_kwds[ 'false_path' ] = false_path param_dict[name] = DatasetFilenameWrapper( data, **wrapper_kwds ) if data: for child in data.children: @@ -3575,12 +3586,15 @@ class DatasetListWrapper( list ): """ """ - def __init__( self, datasets, **kwargs ): + def __init__( self, datasets, false_paths=[], **kwargs ): if not isinstance(datasets, list): datasets = [datasets] def to_wrapper( dataset ): + real_path = dataset.file_name wrapper_kwds = kwargs.copy() + if real_path in false_paths: + wrapper_kwds[ "false_path" ] = false_paths[ real_path ] return DatasetFilenameWrapper( dataset, **wrapper_kwds ) list.__init__( self, map( to_wrapper, datasets ) ) https://bitbucket.org/galaxy/galaxy-central/commits/e6f90d86a8eb/ Changeset: e6f90d86a8eb User: jmchilton Date: 2014-02-11 04:34:42 Summary: Move cheetah wrappers into own module... For clarity - more work toward reduces tools/__init__.py to a more managable size. This also includes an initial suite of test cases for these wrappers - testing simple select wrapper, select wrapper with file options, select wrapper with drilldown widget, raw object wrapper, input value wrapper, and the dataset file name wrapper with and without false paths. Affected #: 3 files diff -r 55cf1bd3df0ecfa058768e47dd39b9e89f619e6e -r e6f90d86a8eb3f3e48d7bdb6ab801aee22027b33 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -65,6 +65,15 @@ from galaxy.model import Workflow from tool_shed.util import shed_util_common from .loader import load_tool, template_macro_params +from .wrappers import ( + ToolParameterValueWrapper, + RawObjectWrapper, + LibraryDatasetValueWrapper, + InputValueWrapper, + SelectToolParameterWrapper, + DatasetFilenameWrapper, + DatasetListWrapper, +) log = logging.getLogger( __name__ ) @@ -3422,183 +3431,6 @@ self.error_level = "fatal" self.desc = "" -class ToolParameterValueWrapper( object ): - """ - Base class for object that Wraps a Tool Parameter and Value. - """ - def __nonzero__( self ): - return bool( self.value ) - def get_display_text( self, quote=True ): - """ - Returns a string containing the value that would be displayed to the user in the tool interface. - When quote is True (default), the string is escaped for e.g. command-line usage. - """ - rval = self.input.value_to_display_text( self.value, self.input.tool.app ) or '' - if quote: - return pipes.quote( rval ) or "''" #pipes.quote in Python < 2.7 returns an empty string instead of the expected quoted empty string - return rval - -class RawObjectWrapper( ToolParameterValueWrapper ): - """ - Wraps an object so that __str__ returns module_name:class_name. - """ - def __init__( self, obj ): - self.obj = obj - def __nonzero__( self ): - return bool( self.obj ) #FIXME: would it be safe/backwards compatible to rename .obj to .value, so that we can just inherit this method? - def __str__( self ): - try: - return "%s:%s" % (self.obj.__module__, self.obj.__class__.__name__) - except: - #Most likely None, which lacks __module__. - return str( self.obj ) - def __getattr__( self, key ): - return getattr( self.obj, key ) - -class LibraryDatasetValueWrapper( ToolParameterValueWrapper ): - """ - Wraps an input so that __str__ gives the "param_dict" representation. - """ - def __init__( self, input, value, other_values={} ): - self.input = input - self.value = value - self._other_values = other_values - self.counter = 0 - def __str__( self ): - return self.value - def __iter__( self ): - return self - def next( self ): - if self.counter >= len(self.value): - raise StopIteration - self.counter += 1 - return self.value[self.counter-1] - def __getattr__( self, key ): - return getattr( self.value, key ) - -class InputValueWrapper( ToolParameterValueWrapper ): - """ - Wraps an input so that __str__ gives the "param_dict" representation. - """ - def __init__( self, input, value, other_values={} ): - self.input = input - self.value = value - self._other_values = other_values - def __str__( self ): - return self.input.to_param_dict_string( self.value, self._other_values ) - def __getattr__( self, key ): - return getattr( self.value, key ) - -class SelectToolParameterWrapper( ToolParameterValueWrapper ): - """ - Wraps a SelectTooParameter so that __str__ returns the selected value, but all other - attributes are accessible. - """ - - class SelectToolParameterFieldWrapper: - """ - Provide access to any field by name or index for this particular value. - Only applicable for dynamic_options selects, which have more than simple 'options' defined (name, value, selected). - """ - def __init__( self, input, value, other_values ): - self._input = input - self._value = value - self._other_values = other_values - self._fields = {} - def __getattr__( self, name ): - if name not in self._fields: - self._fields[ name ] = self._input.options.get_field_by_name_for_value( name, self._value, None, self._other_values ) - return self._input.separator.join( map( str, self._fields[ name ] ) ) - - def __init__( self, input, value, app, other_values={} ): - self.input = input - self.value = value - self.input.value_label = input.value_to_display_text( value, app ) - self._other_values = other_values - self.fields = self.SelectToolParameterFieldWrapper( input, value, other_values ) - def __str__( self ): - return self.input.to_param_dict_string( self.value, other_values = self._other_values ) - def __getattr__( self, key ): - return getattr( self.input, key ) - -class DatasetFilenameWrapper( ToolParameterValueWrapper ): - """ - Wraps a dataset so that __str__ returns the filename, but all other - attributes are accessible. - """ - - class MetadataWrapper: - """ - Wraps a Metadata Collection to return MetadataParameters wrapped - according to the metadata spec. Methods implemented to match behavior - of a Metadata Collection. - """ - def __init__( self, metadata ): - self.metadata = metadata - def __getattr__( self, name ): - rval = self.metadata.get( name, None ) - if name in self.metadata.spec: - if rval is None: - rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) - # Store this value, so we don't need to recalculate if needed - # again - setattr( self, name, rval ) - return rval - def __nonzero__( self ): - return self.metadata.__nonzero__() - def __iter__( self ): - return self.metadata.__iter__() - def get( self, key, default=None ): - try: - return getattr( self, key ) - except: - return default - def items( self ): - return iter( [ ( k, self.get( k ) ) for k, v in self.metadata.items() ] ) - - def __init__( self, dataset, datatypes_registry = None, tool = None, name = None, false_path = None ): - if not dataset: - try: - # TODO: allow this to work when working with grouping - ext = tool.inputs[name].extensions[0] - except: - ext = 'data' - self.dataset = NoneDataset( datatypes_registry = datatypes_registry, ext = ext ) - else: - self.dataset = dataset - self.metadata = self.MetadataWrapper( dataset.metadata ) - self.false_path = false_path - - def __str__( self ): - if self.false_path is not None: - return self.false_path - else: - return self.dataset.file_name - def __getattr__( self, key ): - if self.false_path is not None and key == 'file_name': - return self.false_path - else: - return getattr( self.dataset, key ) - def __nonzero__( self ): - return bool( self.dataset ) - -class DatasetListWrapper( list ): - """ - """ - def __init__( self, datasets, false_paths=[], **kwargs ): - if not isinstance(datasets, list): - datasets = [datasets] - - def to_wrapper( dataset ): - real_path = dataset.file_name - wrapper_kwds = kwargs.copy() - if real_path in false_paths: - wrapper_kwds[ "false_path" ] = false_paths[ real_path ] - return DatasetFilenameWrapper( dataset, **wrapper_kwds ) - - list.__init__( self, map( to_wrapper, datasets ) ) - def json_fix( val ): if isinstance( val, list ): diff -r 55cf1bd3df0ecfa058768e47dd39b9e89f619e6e -r e6f90d86a8eb3f3e48d7bdb6ab801aee22027b33 lib/galaxy/tools/wrappers.py --- /dev/null +++ b/lib/galaxy/tools/wrappers.py @@ -0,0 +1,207 @@ +import pipes +from galaxy.util.none_like import NoneDataset + + +class ToolParameterValueWrapper( object ): + """ + Base class for object that Wraps a Tool Parameter and Value. + """ + + def __nonzero__( self ): + return bool( self.value ) + + def get_display_text( self, quote=True ): + """ + Returns a string containing the value that would be displayed to the user in the tool interface. + When quote is True (default), the string is escaped for e.g. command-line usage. + """ + rval = self.input.value_to_display_text( self.value, self.input.tool.app ) or '' + if quote: + return pipes.quote( rval ) or "''" # pipes.quote in Python < 2.7 returns an empty string instead of the expected quoted empty string + return rval + + +class RawObjectWrapper( ToolParameterValueWrapper ): + """ + Wraps an object so that __str__ returns module_name:class_name. + """ + def __init__( self, obj ): + self.obj = obj + + def __nonzero__( self ): + return bool( self.obj ) # FIXME: would it be safe/backwards compatible to rename .obj to .value, so that we can just inherit this method? + + def __str__( self ): + try: + return "%s:%s" % (self.obj.__module__, self.obj.__class__.__name__) + except: + #Most likely None, which lacks __module__. + return str( self.obj ) + + def __getattr__( self, key ): + return getattr( self.obj, key ) + + +class LibraryDatasetValueWrapper( ToolParameterValueWrapper ): + """ + Wraps an input so that __str__ gives the "param_dict" representation. + """ + def __init__( self, input, value, other_values={} ): + self.input = input + self.value = value + self._other_values = other_values + self.counter = 0 + + def __str__( self ): + return self.value + + def __iter__( self ): + return self + + def next( self ): + if self.counter >= len(self.value): + raise StopIteration + self.counter += 1 + return self.value[ self.counter - 1 ] + + def __getattr__( self, key ): + return getattr( self.value, key ) + + +class InputValueWrapper( ToolParameterValueWrapper ): + """ + Wraps an input so that __str__ gives the "param_dict" representation. + """ + def __init__( self, input, value, other_values={} ): + self.input = input + self.value = value + self._other_values = other_values + + def __str__( self ): + return self.input.to_param_dict_string( self.value, self._other_values ) + + def __getattr__( self, key ): + return getattr( self.value, key ) + + +class SelectToolParameterWrapper( ToolParameterValueWrapper ): + """ + Wraps a SelectTooParameter so that __str__ returns the selected value, but all other + attributes are accessible. + """ + + class SelectToolParameterFieldWrapper: + """ + Provide access to any field by name or index for this particular value. + Only applicable for dynamic_options selects, which have more than simple 'options' defined (name, value, selected). + """ + def __init__( self, input, value, other_values ): + self._input = input + self._value = value + self._other_values = other_values + self._fields = {} + + def __getattr__( self, name ): + if name not in self._fields: + self._fields[ name ] = self._input.options.get_field_by_name_for_value( name, self._value, None, self._other_values ) + return self._input.separator.join( map( str, self._fields[ name ] ) ) + + def __init__( self, input, value, app, other_values={} ): + self.input = input + self.value = value + self.input.value_label = input.value_to_display_text( value, app ) + self._other_values = other_values + self.fields = self.SelectToolParameterFieldWrapper( input, value, other_values ) + + def __str__( self ): + return self.input.to_param_dict_string( self.value, other_values=self._other_values ) + + def __getattr__( self, key ): + return getattr( self.input, key ) + + +class DatasetFilenameWrapper( ToolParameterValueWrapper ): + """ + Wraps a dataset so that __str__ returns the filename, but all other + attributes are accessible. + """ + + class MetadataWrapper: + """ + Wraps a Metadata Collection to return MetadataParameters wrapped + according to the metadata spec. Methods implemented to match behavior + of a Metadata Collection. + """ + def __init__( self, metadata ): + self.metadata = metadata + + def __getattr__( self, name ): + rval = self.metadata.get( name, None ) + if name in self.metadata.spec: + if rval is None: + rval = self.metadata.spec[name].no_value + rval = self.metadata.spec[name].param.to_string( rval ) + # Store this value, so we don't need to recalculate if needed + # again + setattr( self, name, rval ) + return rval + + def __nonzero__( self ): + return self.metadata.__nonzero__() + + def __iter__( self ): + return self.metadata.__iter__() + + def get( self, key, default=None ): + try: + return getattr( self, key ) + except: + return default + + def items( self ): + return iter( [ ( k, self.get( k ) ) for k, v in self.metadata.items() ] ) + + def __init__( self, dataset, datatypes_registry=None, tool=None, name=None, false_path=None ): + if not dataset: + try: + # TODO: allow this to work when working with grouping + ext = tool.inputs[name].extensions[0] + except: + ext = 'data' + self.dataset = NoneDataset( datatypes_registry=datatypes_registry, ext=ext ) + else: + self.dataset = dataset + self.metadata = self.MetadataWrapper( dataset.metadata ) + self.false_path = false_path + + def __str__( self ): + if self.false_path is not None: + return self.false_path + else: + return self.dataset.file_name + + def __getattr__( self, key ): + if self.false_path is not None and key == 'file_name': + return self.false_path + else: + return getattr( self.dataset, key ) + + def __nonzero__( self ): + return bool( self.dataset ) + + +class DatasetListWrapper( list ): + """ + """ + def __init__( self, datasets, false_paths=[], **kwargs ): + if not isinstance(datasets, list): + datasets = [datasets] + + def to_wrapper( dataset ): + real_path = dataset.file_name + wrapper_kwds = kwargs.copy() + if real_path in false_paths: + wrapper_kwds[ "false_path" ] = false_paths[ real_path ] + return DatasetFilenameWrapper( dataset, **wrapper_kwds ) + + list.__init__( self, map( to_wrapper, datasets ) ) diff -r 55cf1bd3df0ecfa058768e47dd39b9e89f619e6e -r e6f90d86a8eb3f3e48d7bdb6ab801aee22027b33 test/unit/tools/test_wrappers.py --- /dev/null +++ b/test/unit/tools/test_wrappers.py @@ -0,0 +1,155 @@ +import tempfile +import os + +from galaxy.tools.parameters.basic import SelectToolParameter +from galaxy.tools.parameters.basic import DrillDownSelectToolParameter +from galaxy.tools.parameters.basic import IntegerToolParameter +from galaxy.tools.wrappers import RawObjectWrapper +from galaxy.tools.wrappers import SelectToolParameterWrapper +from galaxy.tools.wrappers import InputValueWrapper +from galaxy.tools.wrappers import DatasetFilenameWrapper +from galaxy.util.bunch import Bunch +from elementtree.ElementTree import XML +from galaxy.datatypes.metadata import MetadataSpecCollection + + +def with_mock_tool(func): + def call(): + test_directory = tempfile.mkdtemp() + app = MockApp(test_directory) + tool = MockTool(app) + return func(tool) + call.__name__ = func.__name__ + return call + + +@with_mock_tool +def test_select_wrapper_simple_options(tool): + xml = XML('''<param name="blah" type="select"> + <option value="x">I am X</option> + <option value="y" selected="true">I am Y</option> + <option value="z">I am Z</option> + </param>''') + parameter = SelectToolParameter( tool, xml ) + wrapper = SelectToolParameterWrapper( parameter, "x", tool.app ) + assert str(wrapper) == "x" + assert wrapper.name == "blah" + assert wrapper.value_label == "I am X" + + +@with_mock_tool +def test_select_wrapper_with_drilldown(tool): + parameter = _drilldown_parameter(tool) + wrapper = SelectToolParameterWrapper( parameter, ["option3"], tool.app ) + assert str(wrapper) == "option3", str(wrapper) + + +@with_mock_tool +def test_select_wrapper_option_file(tool): + parameter = _setup_blast_tool(tool) + wrapper = SelectToolParameterWrapper( parameter, "val2", tool.app ) + assert str(wrapper) == "val2" + assert wrapper.fields.name == "name2" + assert wrapper.fields.path == "path2" + + +@with_mock_tool +def test_select_wrapper_multiple(tool): + parameter = _setup_blast_tool(tool, multiple=True) + wrapper = SelectToolParameterWrapper( parameter, ["val1", "val2"], tool.app ) + assert str(wrapper) == "val1,val2" + assert wrapper.fields.name == "name1,name2" + + +def test_raw_object_wrapper(): + obj = Bunch(x=4) + wrapper = RawObjectWrapper(obj) + assert wrapper.x == 4 + assert wrapper + + false_wrapper = RawObjectWrapper(False) + assert not false_wrapper + + +@with_mock_tool +def test_input_value_wrapper(tool): + parameter = IntegerToolParameter( tool, XML( '<param name="blah" type="integer" size="4" value="10" min="0" />' ) ) + wrapper = InputValueWrapper( parameter, "5" ) + assert str( wrapper ) == "5" + + +def test_dataset_wrapper(): + dataset = MockDataset() + wrapper = DatasetFilenameWrapper(dataset) + assert str( wrapper ) == MOCK_DATASET_PATH + assert wrapper.file_name == MOCK_DATASET_PATH + + assert wrapper.ext == MOCK_DATASET_EXT + + +def test_dataset_wrapper_false_path(): + dataset = MockDataset() + new_path = "/new/path/dataset_123.dat" + wrapper = DatasetFilenameWrapper(dataset, false_path=new_path) + assert str( wrapper ) == new_path + assert wrapper.file_name == new_path + + +def _drilldown_parameter(tool): + xml = XML( '''<param name="some_name" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true"> + <options> + <option name="Heading 1" value="heading1"> + <option name="Option 1" value="option1"/> + <option name="Option 2" value="option2"/> + <option name="Heading 1" value="heading1"> + <option name="Option 3" value="option3"/> + <option name="Option 4" value="option4"/> + </option> + </option> + <option name="Option 5" value="option5"/> + </options> + </param>''' ) + parameter = DrillDownSelectToolParameter( tool, xml ) + return parameter + + +def _setup_blast_tool(tool, multiple=False): + tool.app.write_test_tool_data("blastdb.loc", "val1\tname1\tpath1\nval2\tname2\tpath2\n") + xml = XML( '''<param name="database" type="select" label="Nucleotide BLAST database" multiple="%s"> + <options from_file="blastdb.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param>''' % multiple ) + parameter = SelectToolParameter( tool, xml ) + return parameter + + +MOCK_DATASET_PATH = "/galaxy/database/files/001/dataset_123.dat" +MOCK_DATASET_EXT = "bam" + + +class MockDataset(object): + + def __init__(self): + self.metadata = MetadataSpecCollection({}) + self.file_name = MOCK_DATASET_PATH + self.ext = MOCK_DATASET_EXT + + +class MockTool(object): + + def __init__(self, app): + self.app = app + self.options = Bunch(sanitize=False) + + +class MockApp(object): + + def __init__(self, test_directory): + self.config = Bunch(tool_data_path=test_directory) + + def write_test_tool_data(self, name, contents): + path = os.path.join(self.config.tool_data_path, name) + open(path, "w").write(contents) https://bitbucket.org/galaxy/galaxy-central/commits/e4cea1f8ef6e/ Changeset: e4cea1f8ef6e User: jmchilton Date: 2014-02-11 04:34:42 Summary: Rework interface between jobs and tools. Pull code out of job wrapper and tool for building and evaluating against template environments and move them into a new ToolEvaluator class (in galaxy/tools/evalution.py). Introduce an abstraction (ComputeEnvironment) for various paths that get evaluated that may be different on a remote server (inputs, outputs, working directory, tools and config directory) and evaluate the template against an instance of this class. Created a default instance of this class (SharedComputeEnvironment). The idea will be that the LWR should be able to an LwrComputeEnvironment and send this to the JobWrapper when building up job inputs - nothing in this commit is LWR specific though so other runners should be able to remotely stage jobs using other mechanisms as well. This commit adds extensive unit tests of this tool evaluation - testing many different branches through the code, with and without path rewriting, testing job hooks, config files, testing the cheetah evaluation of simple parameters, conditionals, repeats, and non-job stuff like $__app__ and $__root_dir__. As well as a new test case class for JobWrapper and TaskWrapper - though this just tests the relevant portions of that class - namely prepare and version handling. Affected #: 6 files diff -r e6f90d86a8eb3f3e48d7bdb6ab801aee22027b33 -r e4cea1f8ef6e1bb02ee2746dddc9da5dc2531165 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1,6 +1,8 @@ """ Support for running a tool in Galaxy via an internal job management system """ +from abc import ABCMeta +from abc import abstractmethod import time import copy @@ -25,6 +27,7 @@ from galaxy.util.expressions import ExpressionContext from galaxy.util.json import from_json_string from galaxy.util import unicodify + from .output_checker import check_output from .datasets import TaskPathRewriter from .datasets import OutputsToWorkingDirectoryPathRewriter @@ -649,7 +652,7 @@ def get_version_string_path( self ): return os.path.abspath(os.path.join(self.app.config.new_file_path, "GALAXY_VERSION_STRING_%s" % self.job_id)) - def prepare( self ): + def prepare( self, compute_environment=None ): """ Prepare the job to run by creating the working directory and the config files. @@ -661,15 +664,19 @@ job = self._load_job() - param_dict = self._build_param_dict( job, populate_special_output_file=True ) + def get_special( ): + special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() + if not special: + special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() + return special + + tool_evaluator = self._get_tool_evaluator( job ) + compute_environment = compute_environment or self.default_compute_environment( job ) + tool_evaluator.set_compute_environment( compute_environment, get_special=get_special ) self.sa_session.flush() - # Build any required config files - config_filenames = self.tool.build_config_files( param_dict, self.working_directory ) - # FIXME: Build the param file (might return None, DEPRECATED) - param_filename = self.tool.build_param_file( param_dict, self.working_directory ) - # Build the job's command line - self.command_line = self.tool.build_command_line( param_dict ) + + self.command_line, self.extra_filenames = tool_evaluator.build() # FIXME: for now, tools get Galaxy's lib dir in their path if self.command_line and self.command_line.startswith( 'python' ): self.galaxy_lib_dir = os.path.abspath( "lib" ) # cwd = galaxy root @@ -681,13 +688,14 @@ self.sa_session.add( job ) self.sa_session.flush() # Return list of all extra files - extra_filenames = config_filenames - if param_filename is not None: - extra_filenames.append( param_filename ) - self.param_dict = param_dict - self.extra_filenames = extra_filenames + self.param_dict = tool_evaluator.param_dict self.version_string_cmd = self.tool.version_string_cmd - return extra_filenames + return self.extra_filenames + + def default_compute_environment( self, job=None ): + if not job: + job = self.get_job() + return SharedComputeEnvironment( self, job ) def _load_job( self ): # Load job from database and verify it has user or session. @@ -697,52 +705,19 @@ raise Exception( 'Job %s has no user and no session.' % job.id ) return job - def _build_param_dict( self, job, populate_special_output_file=True ): - incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) - incoming = self.tool.params_from_strings( incoming, self.app ) - # Do any validation that could not be done at job creation - self.tool.handle_unvalidated_param_values( incoming, self.app ) - # Restore input / output data lists - inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) - out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] ) - inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) - out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) + def _get_tool_evaluator( self, job ): + # Hacky way to avoid cirular import for now. + # Placing ToolEvaluator in either jobs or tools + # result in ciruclar dependency. + from galaxy.tools.evaluation import ToolEvaluator - if populate_special_output_file: - # Set up output dataset association for export history jobs. Because job - # uses a Dataset rather than an HDA or LDA, it's necessary to set up a - # fake dataset association that provides the needed attributes for - # preparing a job. - class FakeDatasetAssociation ( object ): - def __init__( self, dataset=None ): - self.dataset = dataset - self.file_name = dataset.file_name - self.metadata = dict() - self.children = [] - special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() - if not special: - special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() - if special: - out_data[ "output_file" ] = FakeDatasetAssociation( dataset=special.dataset ) - - # These can be passed on the command line if wanted as $__user_*__ - incoming.update( model.User.user_template_environment( job.history and job.history.user ) ) - - # Build params, done before hook so hook can use - param_dict = self.tool.build_param_dict( incoming, - inp_data, out_data, - output_paths=self.get_output_fnames(), - job_working_directory=self.working_directory, - input_paths=self.get_input_paths( job ) ) - - # Certain tools require tasks to be completed prior to job execution - # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). - self.tool.exec_before_job( self.queue.app, inp_data, out_data, param_dict ) - # Run the before queue ("exec_before_job") hook - self.tool.call_hook( 'exec_before_job', self.queue.app, inp_data=inp_data, - out_data=out_data, tool=self.tool, param_dict=incoming) - - return param_dict + tool_evaluator = ToolEvaluator( + app=self.app, + job=job, + tool=self.tool, + local_working_directory=self.working_directory, + ) + return tool_evaluator def fail( self, message, exception=False, stdout="", stderr="", exit_code=None ): """ @@ -1420,7 +1395,7 @@ param_dict = self.tool.params_from_strings( param_dict, self.app ) return param_dict - def prepare( self ): + def prepare( self, compute_environment=None ): """ Prepare the job to run by creating the working directory and the config files. @@ -1431,17 +1406,14 @@ # DBTODO New method for generating command line for a task? - param_dict = self._build_param_dict( job, populate_special_output_file=False ) + tool_evaluator = self._get_tool_evaluator( job ) + compute_environment = compute_environment or self.default_compute_environment( job ) + tool_evaluator.set_compute_environment( compute_environment ) self.sa_session.flush() - # Build any required config files - config_filenames = self.tool.build_config_files( param_dict, self.working_directory ) + self.command_line, self.extra_filenames = tool_evaluator.build() - # FIXME: Build the param file (might return None, DEPRECATED) - param_filename = self.tool.build_param_file( param_dict, self.working_directory ) - # Build the job's command line - self.command_line = self.tool.build_command_line( param_dict ) # FIXME: for now, tools get Galaxy's lib dir in their path if self.command_line and self.command_line.startswith( 'python' ): self.galaxy_lib_dir = os.path.abspath( "lib" ) # cwd = galaxy root @@ -1452,14 +1424,10 @@ task.command_line = self.command_line self.sa_session.add( task ) self.sa_session.flush() - # # Return list of all extra files - extra_filenames = config_filenames - if param_filename is not None: - extra_filenames.append( param_filename ) - self.param_dict = param_dict - self.extra_filenames = extra_filenames + + self.param_dict = tool_evaluator.param_dict self.status = 'prepared' - return extra_filenames + return self.extra_filenames def fail( self, message, exception=False ): log.error("TaskWrapper Failure %s" % message) @@ -1576,6 +1544,71 @@ return os.path.join( self.working_directory, os.path.basename( output_path ) ) +class ComputeEnvironment( object ): + """ Definition of the job as it will be run on the (potentially) remote + compute server. + """ + __metaclass__ = ABCMeta + + @abstractmethod + def output_paths( self ): + """ Output DatasetPaths defined by job. """ + + @abstractmethod + def input_paths( self ): + """ Input DatasetPaths defined by job. """ + + @abstractmethod + def working_directory( self ): + """ Job working directory (potentially remote) """ + + @abstractmethod + def config_directory( self ): + """ Directory containing config files (potentially remote) """ + + @abstractmethod + def sep( self ): + """ os.path.sep for the platform this job will execute in. + """ + + @abstractmethod + def new_file_path( self ): + """ Location to dump new files for this job on remote server. """ + + +class SimpleComputeEnvironment( object ): + + def config_directory( self ): + return self.working_directory( ) + + def sep( self ): + return os.path.sep + + +class SharedComputeEnvironment( SimpleComputeEnvironment ): + """ Default ComputeEnviornment for job and task wrapper to pass + to ToolEvaluator - valid when Galaxy and compute share all the relevant + file systems. + """ + + def __init__( self, job_wrapper, job ): + self.app = job_wrapper.app + self.job_wrapper = job_wrapper + self.job = job + + def output_paths( self ): + return self.job_wrapper.get_output_fnames() + + def input_paths( self ): + return self.job_wrapper.get_input_paths( self.job ) + + def working_directory( self ): + return self.job_wrapper.working_directory + + def new_file_path( self ): + return os.path.abspath( self.app.config.new_file_path ) + + class NoopQueue( object ): """ Implements the JobQueue / JobStopQueue interface but does nothing diff -r e6f90d86a8eb3f3e48d7bdb6ab801aee22027b33 -r e4cea1f8ef6e1bb02ee2746dddc9da5dc2531165 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2560,254 +2560,6 @@ message = e.message return message - def build_param_dict( self, incoming, input_datasets, output_datasets, output_paths, job_working_directory, input_paths=[] ): - """ - Build the dictionary of parameters for substituting into the command - line. Each value is wrapped in a `InputValueWrapper`, which allows - all the attributes of the value to be used in the template, *but* - when the __str__ method is called it actually calls the - `to_param_dict_string` method of the associated input. - """ - param_dict = dict() - param_dict.update(self.template_macro_params) - # All parameters go into the param_dict - param_dict.update( incoming ) - - input_false_paths = dict( [ ( dp.real_path, dp.false_path ) for dp in input_paths if getattr( dp, "false_path", None ) ] ) - - def wrap_values( inputs, input_values ): - """ - Wraps parameters as neccesary. - """ - for input in inputs.itervalues(): - if isinstance( input, Repeat ): - for d in input_values[ input.name ]: - wrap_values( input.inputs, d ) - elif isinstance( input, Conditional ): - values = input_values[ input.name ] - current = values["__current_case__"] - wrap_values( input.cases[current].inputs, values ) - elif isinstance( input, DataToolParameter ) and input.multiple: - input_values[ input.name ] = \ - DatasetListWrapper( input_values[ input.name ], - false_paths=input_false_paths, - datatypes_registry = self.app.datatypes_registry, - tool = self, - name = input.name ) - elif isinstance( input, DataToolParameter ): - ## FIXME: We're populating param_dict with conversions when - ## wrapping values, this should happen as a separate - ## step before wrapping (or call this wrapping step - ## something more generic) (but iterating this same - ## list twice would be wasteful) - # Add explicit conversions by name to current parent - for conversion_name, conversion_extensions, conversion_datatypes in input.conversions: - # If we are at building cmdline step, then converters - # have already executed - conv_ext, converted_dataset = input_values[ input.name ].find_conversion_destination( conversion_datatypes ) - # When dealing with optional inputs, we'll provide a - # valid extension to be used for None converted dataset - if not conv_ext: - conv_ext = conversion_extensions[0] - # input_values[ input.name ] is None when optional - # dataset, 'conversion' of optional dataset should - # create wrapper around NoneDataset for converter output - if input_values[ input.name ] and not converted_dataset: - # Input that converter is based from has a value, - # but converted dataset does not exist - raise Exception( 'A path for explicit datatype conversion has not been found: %s --/--> %s' - % ( input_values[ input.name ].extension, conversion_extensions ) ) - else: - # Trick wrapper into using target conv ext (when - # None) without actually being a tool parameter - input_values[ conversion_name ] = \ - DatasetFilenameWrapper( converted_dataset, - datatypes_registry = self.app.datatypes_registry, - tool = Bunch( conversion_name = Bunch( extensions = conv_ext ) ), - name = conversion_name ) - # Wrap actual input dataset - dataset = input_values[ input.name ] - wrapper_kwds = dict( - datatypes_registry=self.app.datatypes_registry, - tool=self, - name=input.name - ) - real_path = dataset.file_name - if real_path in input_false_paths: - wrapper_kwds[ "false_path" ] = input_false_paths[ real_path ] - input_values[ input.name ] = \ - DatasetFilenameWrapper( dataset, **wrapper_kwds ) - elif isinstance( input, SelectToolParameter ): - input_values[ input.name ] = SelectToolParameterWrapper( - input, input_values[ input.name ], self.app, other_values = param_dict ) - - elif isinstance( input, LibraryDatasetToolParameter ): - # TODO: Handle input rewrites in here? How to test LibraryDatasetToolParameters? - input_values[ input.name ] = LibraryDatasetValueWrapper( - input, input_values[ input.name ], param_dict ) - - else: - input_values[ input.name ] = InputValueWrapper( - input, input_values[ input.name ], param_dict ) - - # HACK: only wrap if check_values is not false, this deals with external - # tools where the inputs don't even get passed through. These - # tools (e.g. UCSC) should really be handled in a special way. - if self.check_values: - wrap_values( self.inputs, param_dict ) - - ## FIXME: when self.check_values==True, input datasets are being wrapped - ## twice (above and below, creating 2 separate - ## DatasetFilenameWrapper objects - first is overwritten by - ## second), is this necessary? - if we get rid of this way to - ## access children, can we stop this redundancy, or is there - ## another reason for this? - ## - Only necessary when self.check_values is False (==external dataset - ## tool?: can this be abstracted out as part of being a datasouce tool?) - ## - But we still want (ALWAYS) to wrap input datasets (this should be - ## checked to prevent overhead of creating a new object?) - # Additionally, datasets go in the param dict. We wrap them such that - # if the bare variable name is used it returns the filename (for - # backwards compatibility). We also add any child datasets to the - # the param dict encoded as: - # "_CHILD___{dataset_name}___{child_designation}", - # but this should be considered DEPRECATED, instead use: - # $dataset.get_child( 'name' ).filename - for name, data in input_datasets.items(): - param_dict_value = param_dict.get(name, None) - if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)): - wrapper_kwds = dict( - datatypes_registry=self.app.datatypes_registry, - tool=self, - name=name, - ) - real_path = data.file_name - if real_path in input_false_paths: - false_path = input_false_paths[ real_path ] - wrapper_kwds[ 'false_path' ] = false_path - param_dict[name] = DatasetFilenameWrapper( data, **wrapper_kwds ) - if data: - for child in data.children: - param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) - output_false_paths = dict( [ ( dp.real_path, dp.false_path ) for dp in output_paths if getattr( dp, "false_path", None ) ] ) - for name, hda in output_datasets.items(): - # Write outputs to the working directory (for security purposes) - # if desired. - real_path = hda.file_name - if real_path in output_false_paths: - false_path = output_false_paths[ real_path ] - param_dict[name] = DatasetFilenameWrapper( hda, false_path = false_path ) - open( false_path, 'w' ).close() - else: - param_dict[name] = DatasetFilenameWrapper( hda ) - # Provide access to a path to store additional files - # TODO: path munging for cluster/dataset server relocatability - param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) )) - for child in hda.children: - param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) - for out_name, output in self.outputs.iteritems(): - if out_name not in param_dict and output.filters: - # Assume the reason we lack this output is because a filter - # failed to pass; for tool writing convienence, provide a - # NoneDataset - param_dict[ out_name ] = NoneDataset( datatypes_registry = self.app.datatypes_registry, ext = output.format ) - - # -- Add useful attributes/functions for use in creating command line. - - # Function for querying a data table. - def get_data_table_entry(table_name, query_attr, query_val, return_attr): - """ - Queries and returns an entry in a data table. - """ - - if table_name in self.app.tool_data_tables: - return self.app.tool_data_tables[ table_name ].get_entry( query_attr, query_val, return_attr ) - - param_dict['__get_data_table_entry__'] = get_data_table_entry - - # We add access to app here, this allows access to app.config, etc - param_dict['__app__'] = RawObjectWrapper( self.app ) - # More convienent access to app.config.new_file_path; we don't need to - # wrap a string, but this method of generating additional datasets - # should be considered DEPRECATED - # TODO: path munging for cluster/dataset server relocatability - param_dict['__new_file_path__'] = os.path.abspath(self.app.config.new_file_path) - # The following points to location (xxx.loc) files which are pointers - # to locally cached data - param_dict['__tool_data_path__'] = param_dict['GALAXY_DATA_INDEX_DIR'] = self.app.config.tool_data_path - # For the upload tool, we need to know the root directory and the - # datatypes conf path, so we can load the datatypes registry - param_dict['__root_dir__'] = param_dict['GALAXY_ROOT_DIR'] = os.path.abspath( self.app.config.root ) - param_dict['__datatypes_config__'] = param_dict['GALAXY_DATATYPES_CONF_FILE'] = self.app.datatypes_registry.integrated_datatypes_configs - param_dict['__admin_users__'] = self.app.config.admin_users - param_dict['__user__'] = RawObjectWrapper( param_dict.get( '__user__', None ) ) - # Return the dictionary of parameters - return param_dict - def build_param_file( self, param_dict, directory=None ): - """ - Build temporary file for file based parameter transfer if needed - """ - if self.command and "$param_file" in self.command: - fd, param_filename = tempfile.mkstemp( dir=directory ) - os.close( fd ) - f = open( param_filename, "wt" ) - for key, value in param_dict.items(): - # parameters can be strings or lists of strings, coerce to list - if type(value) != type([]): - value = [ value ] - for elem in value: - f.write( '%s=%s\n' % (key, elem) ) - f.close() - param_dict['param_file'] = param_filename - return param_filename - else: - return None - def build_config_files( self, param_dict, directory=None ): - """ - Build temporary file for file based parameter transfer if needed - """ - config_filenames = [] - for name, filename, template_text in self.config_files: - # If a particular filename was forced by the config use it - if filename is not None: - if directory is None: - raise Exception( "Config files with fixed filenames require a working directory" ) - config_filename = os.path.join( directory, filename ) - else: - fd, config_filename = tempfile.mkstemp( dir=directory ) - os.close( fd ) - f = open( config_filename, "wt" ) - f.write( fill_template( template_text, context=param_dict ) ) - f.close() - # For running jobs as the actual user, ensure the config file is globally readable - os.chmod( config_filename, 0644 ) - param_dict[name] = config_filename - config_filenames.append( config_filename ) - return config_filenames - def build_command_line( self, param_dict ): - """ - Build command line to invoke this tool given a populated param_dict - """ - command_line = None - if not self.command: - return - try: - # Substituting parameters into the command - command_line = fill_template( self.command, context=param_dict ) - # Remove newlines from command line, and any leading/trailing white space - command_line = command_line.replace( "\n", " " ).replace( "\r", " " ).strip() - except Exception: - # Modify exception message to be more clear - #e.args = ( 'Error substituting into command line. Params: %r, Command: %s' % ( param_dict, self.command ), ) - raise - if self.interpreter: - # TODO: path munging for cluster/dataset server relocatability - executable = command_line.split()[0] - abs_executable = os.path.abspath(os.path.join(self.tool_dir, executable)) - command_line = command_line.replace(executable, abs_executable, 1) - command_line = self.interpreter + " " + command_line - return command_line - def build_dependency_shell_commands( self ): """Return a list of commands to be run to populate the current environment to include this tools requirements.""" if self.tool_shed_repository: diff -r e6f90d86a8eb3f3e48d7bdb6ab801aee22027b33 -r e4cea1f8ef6e1bb02ee2746dddc9da5dc2531165 lib/galaxy/tools/evaluation.py --- /dev/null +++ b/lib/galaxy/tools/evaluation.py @@ -0,0 +1,380 @@ +import os +import tempfile + +from galaxy import model +from galaxy.util.bunch import Bunch +from galaxy.util.none_like import NoneDataset +from galaxy.util.template import fill_template +from galaxy.tools.wrappers import ( + DatasetFilenameWrapper, + DatasetListWrapper, + LibraryDatasetValueWrapper, + SelectToolParameterWrapper, + InputValueWrapper, + RawObjectWrapper +) +from galaxy.tools.parameters.basic import ( + DataToolParameter, + LibraryDatasetToolParameter, + SelectToolParameter, +) +from galaxy.tools.parameters.grouping import Conditional, Repeat + + +class ToolEvaluator( object ): + """ An abstraction linking together a tool and a job runtime to evaluate + tool inputs in an isolated, testable manner. + """ + + def __init__( self, app, tool, job, local_working_directory ): + self.app = app + self.job = job + self.tool = tool + self.local_working_directory = local_working_directory + + def set_compute_environment( self, compute_environment, get_special=None ): + """ + Setup the compute environment and established the outline of the param_dict + for evaluating command and config cheetah templates. + """ + self.compute_environment = compute_environment + + job = self.job + incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) + incoming = self.tool.params_from_strings( incoming, self.app ) + # Do any validation that could not be done at job creation + self.tool.handle_unvalidated_param_values( incoming, self.app ) + # Restore input / output data lists + inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) + out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] ) + inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) + out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) + + if get_special: + + # Set up output dataset association for export history jobs. Because job + # uses a Dataset rather than an HDA or LDA, it's necessary to set up a + # fake dataset association that provides the needed attributes for + # preparing a job. + class FakeDatasetAssociation ( object ): + def __init__( self, dataset=None ): + self.dataset = dataset + self.file_name = dataset.file_name + self.metadata = dict() + self.children = [] + + special = get_special() + if special: + out_data[ "output_file" ] = FakeDatasetAssociation( dataset=special.dataset ) + + # These can be passed on the command line if wanted as $__user_*__ + incoming.update( model.User.user_template_environment( job.history and job.history.user ) ) + + # Build params, done before hook so hook can use + param_dict = self.build_param_dict( + incoming, + inp_data, + out_data, + output_paths=compute_environment.output_paths(), + job_working_directory=compute_environment.working_directory(), + input_paths=compute_environment.input_paths() + ) + + # Certain tools require tasks to be completed prior to job execution + # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). + self.tool.exec_before_job( self.app, inp_data, out_data, param_dict ) + # Run the before queue ("exec_before_job") hook + self.tool.call_hook( 'exec_before_job', self.app, inp_data=inp_data, + out_data=out_data, tool=self.tool, param_dict=incoming) + + self.param_dict = param_dict + + def build_param_dict( self, incoming, input_datasets, output_datasets, output_paths, job_working_directory, input_paths=[] ): + """ + Build the dictionary of parameters for substituting into the command + line. Each value is wrapped in a `InputValueWrapper`, which allows + all the attributes of the value to be used in the template, *but* + when the __str__ method is called it actually calls the + `to_param_dict_string` method of the associated input. + """ + param_dict = dict() + param_dict.update(self.tool.template_macro_params) + # All parameters go into the param_dict + param_dict.update( incoming ) + + input_false_paths = dict( [ ( dp.real_path, dp.false_path ) for dp in input_paths if getattr( dp, "false_path", None ) ] ) + + def wrap_values( inputs, input_values ): + """ + Wraps parameters as neccesary. + """ + for input in inputs.itervalues(): + if isinstance( input, Repeat ): + for d in input_values[ input.name ]: + wrap_values( input.inputs, d ) + elif isinstance( input, Conditional ): + values = input_values[ input.name ] + current = values["__current_case__"] + wrap_values( input.cases[current].inputs, values ) + elif isinstance( input, DataToolParameter ) and input.multiple: + input_values[ input.name ] = \ + DatasetListWrapper( input_values[ input.name ], + false_paths=input_false_paths, + datatypes_registry=self.app.datatypes_registry, + tool=self.tool, + name=input.name ) + elif isinstance( input, DataToolParameter ): + ## FIXME: We're populating param_dict with conversions when + ## wrapping values, this should happen as a separate + ## step before wrapping (or call this wrapping step + ## something more generic) (but iterating this same + ## list twice would be wasteful) + # Add explicit conversions by name to current parent + for conversion_name, conversion_extensions, conversion_datatypes in input.conversions: + # If we are at building cmdline step, then converters + # have already executed + conv_ext, converted_dataset = input_values[ input.name ].find_conversion_destination( conversion_datatypes ) + # When dealing with optional inputs, we'll provide a + # valid extension to be used for None converted dataset + if not conv_ext: + conv_ext = conversion_extensions[0] + # input_values[ input.name ] is None when optional + # dataset, 'conversion' of optional dataset should + # create wrapper around NoneDataset for converter output + if input_values[ input.name ] and not converted_dataset: + # Input that converter is based from has a value, + # but converted dataset does not exist + raise Exception( 'A path for explicit datatype conversion has not been found: %s --/--> %s' + % ( input_values[ input.name ].extension, conversion_extensions ) ) + else: + # Trick wrapper into using target conv ext (when + # None) without actually being a tool parameter + input_values[ conversion_name ] = \ + DatasetFilenameWrapper( converted_dataset, + datatypes_registry=self.app.datatypes_registry, + tool=Bunch( conversion_name=Bunch( extensions=conv_ext ) ), + name=conversion_name ) + # Wrap actual input dataset + dataset = input_values[ input.name ] + wrapper_kwds = dict( + datatypes_registry=self.app.datatypes_registry, + tool=self, + name=input.name + ) + real_path = dataset.file_name + if real_path in input_false_paths: + wrapper_kwds[ "false_path" ] = input_false_paths[ real_path ] + input_values[ input.name ] = \ + DatasetFilenameWrapper( dataset, **wrapper_kwds ) + elif isinstance( input, SelectToolParameter ): + input_values[ input.name ] = SelectToolParameterWrapper( + input, input_values[ input.name ], self.app, other_values=param_dict ) + + elif isinstance( input, LibraryDatasetToolParameter ): + # TODO: Handle input rewrites in here? How to test LibraryDatasetToolParameters? + input_values[ input.name ] = LibraryDatasetValueWrapper( + input, input_values[ input.name ], param_dict ) + + else: + input_values[ input.name ] = InputValueWrapper( + input, input_values[ input.name ], param_dict ) + + # HACK: only wrap if check_values is not false, this deals with external + # tools where the inputs don't even get passed through. These + # tools (e.g. UCSC) should really be handled in a special way. + if self.tool.check_values: + wrap_values( self.tool.inputs, param_dict ) + + ## FIXME: when self.check_values==True, input datasets are being wrapped + ## twice (above and below, creating 2 separate + ## DatasetFilenameWrapper objects - first is overwritten by + ## second), is this necessary? - if we get rid of this way to + ## access children, can we stop this redundancy, or is there + ## another reason for this? + ## - Only necessary when self.check_values is False (==external dataset + ## tool?: can this be abstracted out as part of being a datasouce tool?) + ## - But we still want (ALWAYS) to wrap input datasets (this should be + ## checked to prevent overhead of creating a new object?) + # Additionally, datasets go in the param dict. We wrap them such that + # if the bare variable name is used it returns the filename (for + # backwards compatibility). We also add any child datasets to the + # the param dict encoded as: + # "_CHILD___{dataset_name}___{child_designation}", + # but this should be considered DEPRECATED, instead use: + # $dataset.get_child( 'name' ).filename + for name, data in input_datasets.items(): + param_dict_value = param_dict.get(name, None) + if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)): + wrapper_kwds = dict( + datatypes_registry=self.app.datatypes_registry, + tool=self, + name=name, + ) + real_path = data.file_name + if real_path in input_false_paths: + false_path = input_false_paths[ real_path ] + wrapper_kwds[ 'false_path' ] = false_path + param_dict[name] = DatasetFilenameWrapper( data, **wrapper_kwds ) + if data: + for child in data.children: + param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) + output_false_paths = dict( [ ( dp.real_path, dp.false_path ) for dp in output_paths if getattr( dp, "false_path", None ) ] ) + for name, hda in output_datasets.items(): + # Write outputs to the working directory (for security purposes) + # if desired. + real_path = hda.file_name + if real_path in output_false_paths: + false_path = output_false_paths[ real_path ] + param_dict[name] = DatasetFilenameWrapper( hda, false_path=false_path ) + open( false_path, 'w' ).close() + else: + param_dict[name] = DatasetFilenameWrapper( hda ) + # Provide access to a path to store additional files + # TODO: path munging for cluster/dataset server relocatability + param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) )) + for child in hda.children: + param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) + for out_name, output in self.tool.outputs.iteritems(): + if out_name not in param_dict and output.filters: + # Assume the reason we lack this output is because a filter + # failed to pass; for tool writing convienence, provide a + # NoneDataset + param_dict[ out_name ] = NoneDataset( datatypes_registry=self.app.datatypes_registry, ext=output.format ) + + # -- Add useful attributes/functions for use in creating command line. + + # Function for querying a data table. + def get_data_table_entry(table_name, query_attr, query_val, return_attr): + """ + Queries and returns an entry in a data table. + """ + + if table_name in self.app.tool_data_tables: + return self.app.tool_data_tables[ table_name ].get_entry( query_attr, query_val, return_attr ) + + param_dict['__get_data_table_entry__'] = get_data_table_entry + + # We add access to app here, this allows access to app.config, etc + param_dict['__app__'] = RawObjectWrapper( self.app ) + # More convienent access to app.config.new_file_path; we don't need to + # wrap a string, but this method of generating additional datasets + # should be considered DEPRECATED + # TODO: path munging for cluster/dataset server relocatability + param_dict['__new_file_path__'] = os.path.abspath(self.compute_environment.new_file_path()) + # The following points to location (xxx.loc) files which are pointers + # to locally cached data + param_dict['__tool_data_path__'] = param_dict['GALAXY_DATA_INDEX_DIR'] = self.app.config.tool_data_path + # For the upload tool, we need to know the root directory and the + # datatypes conf path, so we can load the datatypes registry + param_dict['__root_dir__'] = param_dict['GALAXY_ROOT_DIR'] = os.path.abspath( self.app.config.root ) + param_dict['__datatypes_config__'] = param_dict['GALAXY_DATATYPES_CONF_FILE'] = self.app.datatypes_registry.integrated_datatypes_configs + param_dict['__admin_users__'] = self.app.config.admin_users + param_dict['__user__'] = RawObjectWrapper( param_dict.get( '__user__', None ) ) + # Return the dictionary of parameters + return param_dict + + def build( self ): + """ + Build runtime description of job to execute, evaluate command and + config templates corresponding to this tool with these inputs on this + compute environment. + """ + self.extra_filenames = [] + self.command_line = None + + self.__build_config_files( ) + self.__build_param_file( ) + self.__build_command_line( ) + + return self.command_line, self.extra_filenames + + def __build_command_line( self ): + """ + Build command line to invoke this tool given a populated param_dict + """ + command = self.tool.command + param_dict = self.param_dict + interpreter = self.tool.interpreter + command_line = None + if not command: + return + try: + # Substituting parameters into the command + command_line = fill_template( command, context=param_dict ) + # Remove newlines from command line, and any leading/trailing white space + command_line = command_line.replace( "\n", " " ).replace( "\r", " " ).strip() + except Exception: + # Modify exception message to be more clear + #e.args = ( 'Error substituting into command line. Params: %r, Command: %s' % ( param_dict, self.command ), ) + raise + if interpreter: + # TODO: path munging for cluster/dataset server relocatability + executable = command_line.split()[0] + tool_dir = os.path.abspath( self.tool.tool_dir ) + abs_executable = os.path.join( tool_dir, executable ) + command_line = command_line.replace(executable, abs_executable, 1) + command_line = interpreter + " " + command_line + self.command_line = command_line + + def __build_config_files( self ): + """ + Build temporary file for file based parameter transfer if needed + """ + param_dict = self.param_dict + config_filenames = [] + for name, filename, template_text in self.tool.config_files: + # If a particular filename was forced by the config use it + directory = self.local_working_directory + if filename is not None: + config_filename = os.path.join( directory, filename ) + else: + fd, config_filename = tempfile.mkstemp( dir=directory ) + os.close( fd ) + f = open( config_filename, "wt" ) + f.write( fill_template( template_text, context=param_dict ) ) + f.close() + # For running jobs as the actual user, ensure the config file is globally readable + os.chmod( config_filename, 0644 ) + self.__register_extra_file( name, config_filename ) + config_filenames.append( config_filename ) + return config_filenames + + def __build_param_file( self ): + """ + Build temporary file for file based parameter transfer if needed + """ + param_dict = self.param_dict + directory = self.local_working_directory + command = self.tool.command + if command and "$param_file" in command: + fd, param_filename = tempfile.mkstemp( dir=directory ) + os.close( fd ) + f = open( param_filename, "wt" ) + for key, value in param_dict.items(): + # parameters can be strings or lists of strings, coerce to list + if type(value) != type([]): + value = [ value ] + for elem in value: + f.write( '%s=%s\n' % (key, elem) ) + f.close() + self.__register_extra_file( 'param_file', param_filename ) + return param_filename + else: + return None + + def __register_extra_file( self, name, local_config_path ): + """ + Takes in the local path to a config file and registers the (potentially + remote) ultimate path of the config file with the parameter dict. + """ + self.extra_filenames.append( local_config_path ) + config_basename = os.path.basename( local_config_path ) + compute_config_path = self.__join_for_compute(self.compute_environment.config_directory(), config_basename) + self.param_dict[ name ] = compute_config_path + + def __join_for_compute( self, *args ): + """ + os.path.join but with compute_environment.sep for cross-platform + compat. + """ + return self.compute_environment.sep().join( args ) diff -r e6f90d86a8eb3f3e48d7bdb6ab801aee22027b33 -r e4cea1f8ef6e1bb02ee2746dddc9da5dc2531165 test/unit/jobs/test_job_wrapper.py --- /dev/null +++ b/test/unit/jobs/test_job_wrapper.py @@ -0,0 +1,225 @@ +import os +from contextlib import contextmanager + +from unittest import TestCase +from galaxy.model import Job +from galaxy.model import Task +from galaxy.model import User +from galaxy.jobs import JobWrapper +from galaxy.jobs import TaskWrapper +from galaxy.util.bunch import Bunch + +from galaxy.tools import evaluation + +from tools_support import UsesApp +#from tools_support import MockTool + +#from ..tools_and_jobs_helpers import MockApp + +TEST_TOOL_ID = "cufftest" +TEST_VERSION_COMMAND = "bwa --version" +TEST_DEPENDENCIES_COMMANDS = ". /galaxy/modules/bwa/0.5.9/env.sh" +TEST_COMMAND = "" + + +class BaseWrapperTestCase(UsesApp): + + def setUp(self): + self.setup_app() + job = Job() + job.id = 345 + job.tool_id = TEST_TOOL_ID + job.user = User() + self.model_objects = {Job: {345: job}} + self.app.model.context = MockContext(self.model_objects) + + self.app.toolbox = MockToolbox(MockTool(self)) + self.working_directory = os.path.join(self.test_directory, "working") + self.app.object_store = MockObjectStore(self.working_directory) + + self.queue = MockJobQueue(self.app) + self.job = job + + def tearDown(self): + self.tear_down_app() + + @contextmanager + def _prepared_wrapper(self): + wrapper = self._wrapper() + with _mock_tool_evaluator(MockEvaluator): + wrapper.prepare() + yield wrapper + + def test_version_path(self): + wrapper = self._wrapper() + version_path = wrapper.get_version_string_path() + expected_path = os.path.join(self.test_directory, "new_files", "GALAXY_VERSION_STRING_345") + self.assertEquals(version_path, expected_path) + + def test_prepare_sets_command_line(self): + with self._prepared_wrapper() as wrapper: + assert TEST_COMMAND in wrapper.command_line + + def test_prepare_sets_dependency_shell_commands(self): + with self._prepared_wrapper() as wrapper: + assert TEST_DEPENDENCIES_COMMANDS == wrapper.dependency_shell_commands + + +class JobWrapperTestCase(BaseWrapperTestCase, TestCase): + + def _wrapper(self): + return JobWrapper(self.job, self.queue) + + def test_prepare_sets_version_command(self): + with self._prepared_wrapper() as wrapper: + assert TEST_VERSION_COMMAND == wrapper.version_string_cmd, wrapper.version_string_cmd + + +class TaskWrapperTestCase(BaseWrapperTestCase, TestCase): + + def setUp(self): + super(TaskWrapperTestCase, self).setUp() + self.task = Task(self.job, self.working_directory, "prepare_bwa_job.sh") + self.task.id = 4 + self.model_objects[Task] = {4: self.task} + + def _wrapper(self): + return TaskWrapper(self.task, self.queue) + + def test_prepare_sets_no_version_command(self): + with self._prepared_wrapper() as wrapper: + assert wrapper.version_string_cmd is None + + +class MockEvaluator(object): + + def __init__(self, app, tool, job, local_working_directory): + self.app = app + self.tool = tool + self.job = job + self.local_working_directory = local_working_directory + self.param_dict = {} + + def set_compute_environment(self, *args, **kwds): + pass + + def build(self): + return TEST_COMMAND, [] + + +class MockJobQueue(object): + + def __init__(self, app): + self.app = app + self.dispatcher = MockJobDispatcher(app) + + +class MockJobDispatcher(object): + + def __init__(self, app): + pass + + def url_to_destination(self): + pass + + +class MockApp(object): + + def __init__(self, object_store, test_directory, model_objects): + self.object_store = object_store + self.toolbox = MockToolbox(MockTool(self)) + self.config = Bunch( + outputs_to_working_directory=False, + new_file_path=os.path.join(test_directory, "new_files"), + tool_data_path=os.path.join(test_directory, "tools"), + root=os.path.join(test_directory, "galaxy"), + datatypes_registry=Bunch( + integrated_datatypes_configs=os.path.join(test_directory, "datatypes_conf.xml"), + ), + ) + self.job_config = Bunch() + self.model = Bunch(context=MockContext(model_objects)) + + +class MockContext(object): + + def __init__(self, model_objects): + self.expunged_all = False + self.flushed = False + self.model_objects = model_objects + self.created_objects = [] + + def expunge_all(self): + self.expunged_all = True + + def query(self, clazz): + return MockQuery(self.model_objects.get(clazz)) + + def flush(self): + self.flushed = True + + def add(self, object): + self.created_objects.append(object) + + +class MockQuery(object): + + def __init__(self, class_objects): + self.class_objects = class_objects + + def filter_by(self, **kwds): + return Bunch(first=lambda: None) + + def get(self, id): + return self.class_objects.get(id, None) + + +class MockTool(object): + + def __init__(self, app): + self.version_string_cmd = TEST_VERSION_COMMAND + + def build_dependency_shell_commands(self): + return TEST_DEPENDENCIES_COMMANDS + + +class MockToolbox(object): + + def __init__(self, test_tool): + self.test_tool = test_tool + + @property + def tools_by_id(self): + return self + + def get(self, tool_id, default=None): + assert tool_id == TEST_TOOL_ID + return self.test_tool + + +class MockObjectStore(object): + + def __init__(self, working_directory): + self.working_directory = working_directory + os.makedirs(working_directory) + + def create(self, *args, **kwds): + pass + + def get_filename(self, *args, **kwds): + if kwds.get("base_dir", "") == "job_work": + return self.working_directory + return None + + +## Poor man's mocking. Need to get a real mocking library as real Galaxy development +## dependnecy. +@contextmanager +def _mock_tool_evaluator(mock_constructor): + name = evaluation.ToolEvaluator.__name__ + real_classs = getattr(evaluation, name) + try: + setattr(evaluation, name, mock_constructor) + yield + finally: + setattr(evaluation, name, real_classs) diff -r e6f90d86a8eb3f3e48d7bdb6ab801aee22027b33 -r e4cea1f8ef6e1bb02ee2746dddc9da5dc2531165 test/unit/tools/test_evaluation.py --- /dev/null +++ b/test/unit/tools/test_evaluation.py @@ -0,0 +1,252 @@ +import os + +from unittest import TestCase +from galaxy.model import Job +from galaxy.model import History +from galaxy.model import Dataset +from galaxy.model import JobParameter +from galaxy.model import HistoryDatasetAssociation +from galaxy.model import JobToInputDatasetAssociation +from galaxy.tools.evaluation import ToolEvaluator +from galaxy.jobs import SimpleComputeEnvironment +from galaxy.jobs.datasets import DatasetPath +from galaxy.util.bunch import Bunch + +# For MockTool +from galaxy.tools.parameters import params_from_strings +from galaxy.tools import ToolOutput +from galaxy.tools.parameters.grouping import Repeat +from galaxy.tools.parameters.grouping import Conditional +from galaxy.tools.parameters.grouping import ConditionalWhen +from galaxy.tools.parameters.basic import IntegerToolParameter +from galaxy.tools.parameters.basic import SelectToolParameter +from elementtree.ElementTree import XML # Import after model, to ensure elementtree + + +# Test fixtures for Galaxy infrastructure. +from tools_support import UsesApp + +# To Test: +# - param_file handling. + + +class ToolEvaluatorTestCase(TestCase, UsesApp): + + def setUp(self): + self.setup_app() + self.tool = MockTool(self.app) + self.job = Job() + self.job.history = History() + self.job.parameters = [ JobParameter( name="thresh", value="4" ) ] + self.evaluator = ToolEvaluator( self.app, self.tool, self.job, self.test_directory ) + + def tearDown(self): + self.tear_down_app() + + def test_simple_evaluation( self ): + self._setup_test_bwa_job() + self._set_compute_environment() + command_line, extra_filenames = self.evaluator.build( ) + self.assertEquals( command_line, "bwa --thresh=4 --in=/galaxy/files/dataset_1.dat --out=/galaxy/files/dataset_2.dat" ) + + def test_repeat_evaluation( self ): + repeat = Repeat() + repeat.name = "r" + repeat.inputs = { "thresh": self.tool.test_thresh_param() } + self.tool.set_params( { "r": repeat } ) + self.job.parameters = [ JobParameter( name="r", value='''[{"thresh": 4, "__index__": 0},{"thresh": 5, "__index__": 1}]''' ) ] + self.tool._command_line = "prog1 #for $r_i in $r # $r_i.thresh#end for#" + self._set_compute_environment() + command_line, extra_filenames = self.evaluator.build( ) + self.assertEquals( command_line, "prog1 4 5" ) + + def test_conditional_evaluation( self ): + select_xml = XML('''<param name="always_true" type="select"><option value="true">True</option></param>''') + parameter = SelectToolParameter( self.tool, select_xml ) + + conditional = Conditional() + conditional.name = "c" + conditional.test_param = parameter + when = ConditionalWhen() + when.inputs = { "thresh": self.tool.test_thresh_param() } + when.value = "true" + conditional.cases = [ when ] + self.tool.set_params( { "c": conditional } ) + self.job.parameters = [ JobParameter( name="c", value='''{"thresh": 4, "always_true": "true", "__current_case__": 0}''' ) ] + self.tool._command_line = "prog1 --thresh=${c.thresh} --test_param=${c.always_true}" + self._set_compute_environment() + command_line, extra_filenames = self.evaluator.build( ) + self.assertEquals( command_line, "prog1 --thresh=4 --test_param=true" ) + + def test_evaluation_with_path_rewrites_wrapped( self ): + self.tool.check_values = True + self.__test_evaluation_with_path_rewrites() + + def test_evaluation_with_path_rewrites_unwrapped( self ): + self.tool.check_values = False + self.__test_evaluation_with_path_rewrites() + + def __test_evaluation_with_path_rewrites( self ): + # Various things can cause dataset paths to be rewritten (Task + # splitting, config.outputs_to_working_directory). This tests that + #functionality. + self._setup_test_bwa_job() + job_path_1 = "%s/dataset_1.dat" % self.test_directory + job_path_2 = "%s/dataset_2.dat" % self.test_directory + self._set_compute_environment( + input_paths=[DatasetPath(1, '/galaxy/files/dataset_1.dat', false_path=job_path_1)], + output_paths=[DatasetPath(2, '/galaxy/files/dataset_2.dat', false_path=job_path_2)], + ) + command_line, extra_filenames = self.evaluator.build( ) + self.assertEquals( command_line, "bwa --thresh=4 --in=%s --out=%s" % (job_path_1, job_path_2) ) + + def test_configfiles_evaluation( self ): + self.tool.config_files.append( ( "conf1", None, "$thresh" ) ) + self.tool._command_line = "prog1 $conf1" + self._set_compute_environment() + command_line, extra_filenames = self.evaluator.build( ) + self.assertEquals( len( extra_filenames ), 1) + config_filename = extra_filenames[ 0 ] + config_basename = os.path.basename( config_filename ) + # Verify config file written into working directory. + self.assertEquals( os.path.join( self.test_directory, config_basename ), config_filename ) + # Verify config file contents are evaluated against parameters. + assert open( config_filename, "r").read() == "4" + self.assertEquals(command_line, "prog1 %s" % config_filename) + + def test_template_property_app( self ): + self._assert_template_property_is("$__app__.config.new_file_path", self.app.config.new_file_path) + + def test_template_property_new_file_path( self ): + self._assert_template_property_is("$__new_file_path__", self.app.config.new_file_path) + + def test_template_property_root_dir( self ): + self._assert_template_property_is("$__root_dir__", self.app.config.root) + + def test_template_property_admin_users( self ): + self._assert_template_property_is("$__admin_users__", "mary@example.com") + + def _assert_template_property_is(self, expression, value): + self.tool._command_line = "test.exe" + self.tool.config_files.append( ( "conf1", None, """%s""" % expression) ) + self._set_compute_environment() + _, extra_filenames = self.evaluator.build( ) + config_filename = extra_filenames[ 0 ] + self.assertEquals(open( config_filename, "r").read(), value) + + def _set_compute_environment(self, **kwds): + if "working_directory" not in kwds: + kwds[ "working_directory" ] = self.test_directory + if "new_file_path" not in kwds: + kwds[ "new_file_path" ] = self.app.config.new_file_path + self.evaluator.set_compute_environment( TestComputeEnviornment( **kwds ) ) + self.assertIn( "exec_before_job", self.tool.hooks_called ) + + def _setup_test_bwa_job( self ): + self.job.input_datasets = [ self._job_dataset( 'input1', '/galaxy/files/dataset_1.dat' ) ] + self.job.output_datasets = [ self._job_dataset( 'output1', '/galaxy/files/dataset_2.dat' ) ] + + def _job_dataset( self, name, path ): + metadata = dict( ) + hda = HistoryDatasetAssociation( name=name, metadata=metadata ) + hda.dataset = Dataset( id=123, external_filename=path ) + hda.dataset.metadata = dict() + hda.children = [] + jida = JobToInputDatasetAssociation( name=name, dataset=hda ) + return jida + + +class MockHistoryDatasetAssociation( HistoryDatasetAssociation ): + + def __init__( self, **kwds ): + self._metadata = dict() + super( MockHistoryDatasetAssociation, self ).__init__( **kwds ) + + +class TestComputeEnviornment( SimpleComputeEnvironment ): + + def __init__( + self, + new_file_path, + working_directory, + input_paths=[ '/galaxy/files/dataset_1.dat' ], + output_paths=[ '/galaxy/files/dataset_2.dat' ], + ): + self._new_file_path = new_file_path + self._working_directory = working_directory + self._input_paths = input_paths + self._output_paths = output_paths + + def input_paths( self ): + return self._input_paths + + def output_paths( self ): + return self._output_paths + + def working_directory( self ): + return self._working_directory + + def new_file_path(self): + return self._new_file_path + + +class MockTool( object ): + + def __init__( self, app ): + self.app = app + self.hooks_called = [] + self._config_files = [] + self._command_line = "bwa --thresh=$thresh --in=$input1 --out=$output1" + self._params = { "thresh": self.test_thresh_param() } + self.options = Bunch(sanitize=False) + self.check_values = True + + def test_thresh_param( self ): + elem = XML( '<param name="thresh" type="integer" value="5" />' ) + return IntegerToolParameter( self, elem ) + + def params_from_strings( self, params, app, ignore_errors=False ): + return params_from_strings( self.inputs, params, app, ignore_errors ) + + @property + def template_macro_params( self ): + return {} + + @property + def inputs( self ): + return self._params + + def set_params( self, params ): + self._params = params + + @property + def outputs( self ): + #elem_output1 = XML( '<param name="output1" type="data" format="txt"/>' ) + # DataToolParameter( self, elem_output1 ), + return dict( + output1=ToolOutput( "output1" ), + ) + + @property + def config_files( self ): + return self._config_files + + @property + def command( self ): + return self._command_line + + @property + def interpreter( self ): + return None + + def handle_unvalidated_param_values( self, input_values, app ): + pass + + def build_param_dict( self, incoming, *args, **kwds ): + return incoming + + def call_hook( self, hook_name, *args, **kwargs ): + self.hooks_called.append( hook_name ) + + def exec_before_job( self, *args, **kwargs ): + pass https://bitbucket.org/galaxy/galaxy-central/commits/bbe023fc7654/ Changeset: bbe023fc7654 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Introduce dataset_path_rewrites to slightly simplify tools/evaluation.py. Affected #: 2 files diff -r e4cea1f8ef6e1bb02ee2746dddc9da5dc2531165 -r bbe023fc76547187210c8f99e07446ffbf493b4f lib/galaxy/jobs/datasets.py --- a/lib/galaxy/jobs/datasets.py +++ b/lib/galaxy/jobs/datasets.py @@ -7,6 +7,11 @@ from abc import abstractmethod +def dataset_path_rewrites( dataset_paths ): + dataset_paths_with_rewrites = filter( lambda path: getattr( path, "false_path", None ), dataset_paths ) + return dict( [ ( dp.real_path, dp.false_path ) for dp in dataset_paths_with_rewrites ] ) + + class DatasetPath( object ): def __init__( self, dataset_id, real_path, false_path=None, mutable=True ): self.dataset_id = dataset_id diff -r e4cea1f8ef6e1bb02ee2746dddc9da5dc2531165 -r bbe023fc76547187210c8f99e07446ffbf493b4f lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -19,6 +19,7 @@ SelectToolParameter, ) from galaxy.tools.parameters.grouping import Conditional, Repeat +from galaxy.jobs.datasets import dataset_path_rewrites class ToolEvaluator( object ): @@ -102,7 +103,7 @@ # All parameters go into the param_dict param_dict.update( incoming ) - input_false_paths = dict( [ ( dp.real_path, dp.false_path ) for dp in input_paths if getattr( dp, "false_path", None ) ] ) + input_false_paths = dataset_path_rewrites( input_paths ) def wrap_values( inputs, input_values ): """ @@ -218,7 +219,7 @@ if data: for child in data.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) - output_false_paths = dict( [ ( dp.real_path, dp.false_path ) for dp in output_paths if getattr( dp, "false_path", None ) ] ) + output_false_paths = dataset_path_rewrites( output_paths ) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. https://bitbucket.org/galaxy/galaxy-central/commits/55276af33bf8/ Changeset: 55276af33bf8 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Add rewrite_parameters (defaults to False) option to LWR runner. When false the current behavior of the LWR is maintained and cheetah templates are evaluated as if the command will be submitted locally and then the LWR will rewrite the command and config files and transfer files as needed. If this option is set to True, an instance of the new class LwrComputeEnvironment will be passed along to job_wrapper.prepare allowing the LWR runner to instruct with the tool evaluation process to use the remote paths for input, output, and config files during evaluation. This option feels less hacky - I am skeptical there are tools that will work this way that would not have worked the other way and this does have down sides such as forcing all input files to be transferred regardless of whether actually used in command or config files. This will hopefully provide an entry point for modifying behavior of tool data tables however. tool_dir is still rewritten either way - this is a small point hopefully. Affected #: 2 files diff -r bbe023fc76547187210c8f99e07446ffbf493b4f -r 55276af33bf85f0ec5b773a8e6bb96dbe9d8b613 lib/galaxy/jobs/datasets.py --- a/lib/galaxy/jobs/datasets.py +++ b/lib/galaxy/jobs/datasets.py @@ -25,6 +25,20 @@ else: return self.false_path + def with_path_for_job( self, false_path ): + """ + Clone the dataset path but with a new false_path. + """ + dataset_path = self + if false_path is not None: + dataset_path = DatasetPath( + dataset_id=self.dataset_id, + real_path=self.real_path, + false_path=false_path, + mutable=self.mutable, + ) + return dataset_path + class DatasetPathRewriter( object ): """ Used by runner to rewrite paths. """ diff -r bbe023fc76547187210c8f99e07446ffbf493b4f -r 55276af33bf85f0ec5b773a8e6bb96dbe9d8b613 lib/galaxy/jobs/runners/lwr.py --- a/lib/galaxy/jobs/runners/lwr.py +++ b/lib/galaxy/jobs/runners/lwr.py @@ -2,9 +2,11 @@ from galaxy import model from galaxy.jobs.runners import AsynchronousJobState, AsynchronousJobRunner +from galaxy.jobs import ComputeEnvironment from galaxy.jobs import JobDestination from galaxy.jobs.command_factory import build_command from galaxy.util import string_as_bool_or_none +from galaxy.util import in_directory from galaxy.util.bunch import Bunch import errno @@ -17,6 +19,7 @@ from .lwr_client import ClientJobDescription from .lwr_client import LwrOutputs from .lwr_client import GalaxyOutputs +from .lwr_client import PathMapper log = logging.getLogger( __name__ ) @@ -73,15 +76,17 @@ dependency_resolution = LwrJobRunner.__dependency_resolution( client ) remote_dependency_resolution = dependency_resolution == "remote" requirements = job_wrapper.tool.requirements if remote_dependency_resolution else [] + rewrite_paths = not LwrJobRunner.__rewrite_parameters( client ) client_job_description = ClientJobDescription( command_line=command_line, output_files=self.get_output_files(job_wrapper), - input_files=job_wrapper.get_input_fnames(), + input_files=self.get_input_files(job_wrapper), working_directory=job_wrapper.working_directory, tool=job_wrapper.tool, config_files=job_wrapper.extra_filenames, requirements=requirements, version_file=job_wrapper.get_version_string_path(), + rewrite_paths=rewrite_paths, ) job_id = lwr_submit_job(client, client_job_description, remote_job_config) log.info("lwr job submitted with job_id %s" % job_id) @@ -106,11 +111,16 @@ client = None remote_job_config = None try: - job_wrapper.prepare() - self.__prepare_input_files_locally(job_wrapper) client = self.get_client_from_wrapper(job_wrapper) tool = job_wrapper.tool remote_job_config = client.setup(tool.id, tool.version) + rewrite_parameters = LwrJobRunner.__rewrite_parameters( client ) + prepare_kwds = {} + if rewrite_parameters: + compute_environment = LwrComputeEnvironment( client, job_wrapper, remote_job_config ) + prepare_kwds[ 'compute_environment' ] = compute_environment + job_wrapper.prepare( **prepare_kwds ) + self.__prepare_input_files_locally(job_wrapper) remote_metadata = LwrJobRunner.__remote_metadata( client ) remote_work_dir_copy = LwrJobRunner.__remote_work_dir_copy( client ) dependency_resolution = LwrJobRunner.__dependency_resolution( client ) @@ -147,8 +157,12 @@ job_wrapper.prepare_input_files_cmds = None # prevent them from being used in-line def get_output_files(self, job_wrapper): - output_fnames = job_wrapper.get_output_fnames() - return [ str( o ) for o in output_fnames ] + output_paths = job_wrapper.get_output_fnames() + return [ str( o ) for o in output_paths ] # Force job_path from DatasetPath objects. + + def get_input_files(self, job_wrapper): + input_paths = job_wrapper.get_input_paths() + return [ str( i ) for i in input_paths ] # Force job_path from DatasetPath objects. def get_client_from_wrapper(self, job_wrapper): job_id = job_wrapper.job_id @@ -323,6 +337,10 @@ use_remote_datatypes = string_as_bool_or_none( lwr_client.destination_params.get( "use_remote_datatypes", False ) ) return use_remote_datatypes + @staticmethod + def __rewrite_parameters( lwr_client ): + return string_as_bool_or_none( lwr_client.destination_params.get( "rewrite_parameters", False ) ) or False + def __build_metadata_configuration(self, client, job_wrapper, remote_metadata, remote_job_config): metadata_kwds = {} if remote_metadata: @@ -352,3 +370,55 @@ metadata_kwds['datatypes_config'] = os.path.join(configs_directory, os.path.basename(integrates_datatypes_config)) return metadata_kwds + + +class LwrComputeEnvironment( ComputeEnvironment ): + + def __init__( self, lwr_client, job_wrapper, remote_job_config ): + self.lwr_client = lwr_client + self.job_wrapper = job_wrapper + self.local_path_config = job_wrapper.default_compute_environment() + # job_wrapper.prepare is going to expunge the job backing the following + # computations, so precalculate these paths. + self._wrapper_input_paths = self.local_path_config.input_paths() + self._wrapper_output_paths = self.local_path_config.output_paths() + self.path_mapper = PathMapper(lwr_client, remote_job_config, self.local_path_config.working_directory()) + self._config_directory = remote_job_config[ "configs_directory" ] + self._working_directory = remote_job_config[ "working_directory" ] + self._sep = remote_job_config[ "system_properties" ][ "separator" ] + self._tool_dir = remote_job_config[ "tools_directory" ] + + def output_paths( self ): + local_output_paths = self._wrapper_output_paths + + results = [] + for local_output_path in local_output_paths: + wrapper_path = str( local_output_path ) + remote_path = self.path_mapper.remote_output_path_rewrite( wrapper_path ) + results.append( local_output_path.with_path_for_job( remote_path ) ) + return results + + def input_paths( self ): + local_input_paths = self._wrapper_input_paths + + results = [] + for local_input_path in local_input_paths: + wrapper_path = str( local_input_path ) + # This will over-copy in some cases. For instance in the case of task + # splitting, this input will be copied even though only the work dir + # input will actually be used. + remote_path = self.path_mapper.remote_input_path_rewrite( wrapper_path ) + results.append( local_input_path.with_path_for_job( remote_path ) ) + return results + + def working_directory( self ): + return self._working_directory + + def config_directory( self ): + return self._config_directory + + def new_file_path( self ): + return self.working_directory() # Problems with doing this? + + def sep( self ): + return self._sep https://bitbucket.org/galaxy/galaxy-central/commits/79557437160a/ Changeset: 79557437160a User: jmchilton Date: 2014-02-11 04:34:42 Summary: Move false_path logic out of tool evaluation code and into cheetah wrapper. This should ease replacing extra_files_path in subsequent commits. Affected #: 4 files diff -r 55276af33bf85f0ec5b773a8e6bb96dbe9d8b613 -r 79557437160ae40b410dc1c7d627bf722387b324 lib/galaxy/jobs/datasets.py --- a/lib/galaxy/jobs/datasets.py +++ b/lib/galaxy/jobs/datasets.py @@ -9,10 +9,11 @@ def dataset_path_rewrites( dataset_paths ): dataset_paths_with_rewrites = filter( lambda path: getattr( path, "false_path", None ), dataset_paths ) - return dict( [ ( dp.real_path, dp.false_path ) for dp in dataset_paths_with_rewrites ] ) + return dict( [ ( dp.real_path, dp ) for dp in dataset_paths_with_rewrites ] ) class DatasetPath( object ): + def __init__( self, dataset_id, real_path, false_path=None, mutable=True ): self.dataset_id = dataset_id self.real_path = real_path diff -r 55276af33bf85f0ec5b773a8e6bb96dbe9d8b613 -r 79557437160ae40b410dc1c7d627bf722387b324 lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -103,7 +103,7 @@ # All parameters go into the param_dict param_dict.update( incoming ) - input_false_paths = dataset_path_rewrites( input_paths ) + input_dataset_paths = dataset_path_rewrites( input_paths ) def wrap_values( inputs, input_values ): """ @@ -120,7 +120,7 @@ elif isinstance( input, DataToolParameter ) and input.multiple: input_values[ input.name ] = \ DatasetListWrapper( input_values[ input.name ], - false_paths=input_false_paths, + dataset_paths=input_dataset_paths, datatypes_registry=self.app.datatypes_registry, tool=self.tool, name=input.name ) @@ -163,8 +163,8 @@ name=input.name ) real_path = dataset.file_name - if real_path in input_false_paths: - wrapper_kwds[ "false_path" ] = input_false_paths[ real_path ] + if real_path in input_dataset_paths: + wrapper_kwds[ "dataset_path" ] = input_dataset_paths[ real_path ] input_values[ input.name ] = \ DatasetFilenameWrapper( dataset, **wrapper_kwds ) elif isinstance( input, SelectToolParameter ): @@ -212,22 +212,22 @@ name=name, ) real_path = data.file_name - if real_path in input_false_paths: - false_path = input_false_paths[ real_path ] - wrapper_kwds[ 'false_path' ] = false_path + if real_path in input_dataset_paths: + dataset_path = input_dataset_paths[ real_path ] + wrapper_kwds[ 'dataset_path' ] = dataset_path param_dict[name] = DatasetFilenameWrapper( data, **wrapper_kwds ) if data: for child in data.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) - output_false_paths = dataset_path_rewrites( output_paths ) + output_dataset_paths = dataset_path_rewrites( output_paths ) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. real_path = hda.file_name - if real_path in output_false_paths: - false_path = output_false_paths[ real_path ] - param_dict[name] = DatasetFilenameWrapper( hda, false_path=false_path ) - open( false_path, 'w' ).close() + if real_path in output_dataset_paths: + dataset_path = output_dataset_paths[ real_path ] + param_dict[name] = DatasetFilenameWrapper( hda, dataset_path=dataset_path ) + open( dataset_path.false_path, 'w' ).close() else: param_dict[name] = DatasetFilenameWrapper( hda ) # Provide access to a path to store additional files diff -r 55276af33bf85f0ec5b773a8e6bb96dbe9d8b613 -r 79557437160ae40b410dc1c7d627bf722387b324 lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -161,7 +161,7 @@ def items( self ): return iter( [ ( k, self.get( k ) ) for k, v in self.metadata.items() ] ) - def __init__( self, dataset, datatypes_registry=None, tool=None, name=None, false_path=None ): + def __init__( self, dataset, datatypes_registry=None, tool=None, name=None, dataset_path=None ): if not dataset: try: # TODO: allow this to work when working with grouping @@ -172,7 +172,7 @@ else: self.dataset = dataset self.metadata = self.MetadataWrapper( dataset.metadata ) - self.false_path = false_path + self.false_path = getattr( dataset_path, "false_path", None ) def __str__( self ): if self.false_path is not None: @@ -193,15 +193,15 @@ class DatasetListWrapper( list ): """ """ - def __init__( self, datasets, false_paths=[], **kwargs ): + def __init__( self, datasets, dataset_paths=[], **kwargs ): if not isinstance(datasets, list): datasets = [datasets] def to_wrapper( dataset ): real_path = dataset.file_name wrapper_kwds = kwargs.copy() - if real_path in false_paths: - wrapper_kwds[ "false_path" ] = false_paths[ real_path ] + if real_path in dataset_paths: + wrapper_kwds[ "dataset_path" ] = dataset_paths[ real_path ] return DatasetFilenameWrapper( dataset, **wrapper_kwds ) list.__init__( self, map( to_wrapper, datasets ) ) diff -r 55276af33bf85f0ec5b773a8e6bb96dbe9d8b613 -r 79557437160ae40b410dc1c7d627bf722387b324 test/unit/tools/test_wrappers.py --- a/test/unit/tools/test_wrappers.py +++ b/test/unit/tools/test_wrappers.py @@ -90,7 +90,7 @@ def test_dataset_wrapper_false_path(): dataset = MockDataset() new_path = "/new/path/dataset_123.dat" - wrapper = DatasetFilenameWrapper(dataset, false_path=new_path) + wrapper = DatasetFilenameWrapper(dataset, dataset_path=Bunch(false_path=new_path)) assert str( wrapper ) == new_path assert wrapper.file_name == new_path https://bitbucket.org/galaxy/galaxy-central/commits/9b5215a5db72/ Changeset: 9b5215a5db72 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Allow 'false_path' style replacing of extra files paths. Updates DatasetFilenameWrapper and DatasetPath to allow this. Affected #: 3 files diff -r 79557437160ae40b410dc1c7d627bf722387b324 -r 9b5215a5db7215881d8cc7b04bb853195196a60e lib/galaxy/jobs/datasets.py --- a/lib/galaxy/jobs/datasets.py +++ b/lib/galaxy/jobs/datasets.py @@ -14,10 +14,18 @@ class DatasetPath( object ): - def __init__( self, dataset_id, real_path, false_path=None, mutable=True ): + def __init__( + self, + dataset_id, + real_path, + false_path=None, + false_extra_files_path=None, + mutable=True + ): self.dataset_id = dataset_id self.real_path = real_path self.false_path = false_path + self.false_extra_files_path = false_extra_files_path self.mutable = mutable def __str__( self ): diff -r 79557437160ae40b410dc1c7d627bf722387b324 -r 9b5215a5db7215881d8cc7b04bb853195196a60e lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -173,6 +173,7 @@ self.dataset = dataset self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = getattr( dataset_path, "false_path", None ) + self.false_extra_files_path = getattr( dataset_path, "false_extra_files_path", None ) def __str__( self ): if self.false_path is not None: @@ -183,6 +184,8 @@ def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path + elif self.false_extra_files_path is not None and key == 'extra_files_path': + return self.false_extra_files_path else: return getattr( self.dataset, key ) diff -r 79557437160ae40b410dc1c7d627bf722387b324 -r 9b5215a5db7215881d8cc7b04bb853195196a60e test/unit/tools/test_wrappers.py --- a/test/unit/tools/test_wrappers.py +++ b/test/unit/tools/test_wrappers.py @@ -8,6 +8,7 @@ from galaxy.tools.wrappers import SelectToolParameterWrapper from galaxy.tools.wrappers import InputValueWrapper from galaxy.tools.wrappers import DatasetFilenameWrapper +from galaxy.jobs.datasets import DatasetPath from galaxy.util.bunch import Bunch from elementtree.ElementTree import XML from galaxy.datatypes.metadata import MetadataSpecCollection @@ -95,6 +96,24 @@ assert wrapper.file_name == new_path +def test_dataset_false_extra_files_path(): + dataset = MockDataset() + + wrapper = DatasetFilenameWrapper(dataset) + assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH + + new_path = "/new/path/dataset_123.dat" + dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path) + wrapper = DatasetFilenameWrapper(dataset, dataset_path=dataset_path) + # Setting false_path is not enough to override + assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH + + new_files_path = "/new/path/dataset_123_files" + dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path, false_extra_files_path=new_files_path) + wrapper = DatasetFilenameWrapper(dataset, dataset_path=dataset_path) + assert wrapper.extra_files_path == new_files_path + + def _drilldown_parameter(tool): xml = XML( '''<param name="some_name" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true"><options> @@ -127,6 +146,7 @@ MOCK_DATASET_PATH = "/galaxy/database/files/001/dataset_123.dat" +MOCK_DATASET_EXTRA_FILES_PATH = "/galaxy/database/files/001/dataset_123.dat" MOCK_DATASET_EXT = "bam" @@ -135,6 +155,7 @@ def __init__(self): self.metadata = MetadataSpecCollection({}) self.file_name = MOCK_DATASET_PATH + self.extra_files_path = MOCK_DATASET_EXTRA_FILES_PATH self.ext = MOCK_DATASET_EXT https://bitbucket.org/galaxy/galaxy-central/commits/88b86067a11d/ Changeset: 88b86067a11d User: jmchilton Date: 2014-02-11 04:34:42 Summary: Extend LwrComputeEnvironmet to handle rewriting extra_files_path. Affected #: 2 files diff -r 9b5215a5db7215881d8cc7b04bb853195196a60e -r 88b86067a11d355159446bb98d75928149d08326 lib/galaxy/jobs/datasets.py --- a/lib/galaxy/jobs/datasets.py +++ b/lib/galaxy/jobs/datasets.py @@ -34,7 +34,7 @@ else: return self.false_path - def with_path_for_job( self, false_path ): + def with_path_for_job( self, false_path, false_extra_files_path=None ): """ Clone the dataset path but with a new false_path. """ @@ -44,6 +44,7 @@ dataset_id=self.dataset_id, real_path=self.real_path, false_path=false_path, + false_extra_files_path=false_extra_files_path, mutable=self.mutable, ) return dataset_path diff -r 9b5215a5db7215881d8cc7b04bb853195196a60e -r 88b86067a11d355159446bb98d75928149d08326 lib/galaxy/jobs/runners/lwr.py --- a/lib/galaxy/jobs/runners/lwr.py +++ b/lib/galaxy/jobs/runners/lwr.py @@ -395,7 +395,7 @@ for local_output_path in local_output_paths: wrapper_path = str( local_output_path ) remote_path = self.path_mapper.remote_output_path_rewrite( wrapper_path ) - results.append( local_output_path.with_path_for_job( remote_path ) ) + results.append( self._dataset_path( local_output_path, remote_path ) ) return results def input_paths( self ): @@ -408,9 +408,15 @@ # splitting, this input will be copied even though only the work dir # input will actually be used. remote_path = self.path_mapper.remote_input_path_rewrite( wrapper_path ) - results.append( local_input_path.with_path_for_job( remote_path ) ) + results.append( self._dataset_path( local_input_path, remote_path ) ) return results + def _dataset_path( self, local_dataset_path, remote_path ): + remote_extra_files_path = None + if remote_path: + remote_extra_files_path = "%s_files" % remote_path[ 0:-len( ".dat" ) ] + return local_dataset_path.with_path_for_job( remote_path, remote_extra_files_path ) + def working_directory( self ): return self._working_directory https://bitbucket.org/galaxy/galaxy-central/commits/eb71ed51be40/ Changeset: eb71ed51be40 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Rework version string handling for ComputeEnvironment abstraction. Add version_path method to ComputeEnviornment interface - provide default implementation delegating to existing JobWrapper method as well as LwrComputeEnviornment piggy backing on recently added version command support. Build full command to do this in JobWrapper.prepare so that ComputeEnviornment is available and potentially remote path can be used. Affected #: 5 files diff -r 88b86067a11d355159446bb98d75928149d08326 -r eb71ed51be40f3bfc523d278b67640bb280cd2e8 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -571,7 +571,7 @@ self.extra_filenames = [] self.command_line = None # Tool versioning variables - self.version_string_cmd = None + self.write_version_cmd = None self.version_string = "" self.galaxy_lib_dir = None # With job outputs in the working directory, we need the working @@ -689,7 +689,11 @@ self.sa_session.flush() # Return list of all extra files self.param_dict = tool_evaluator.param_dict - self.version_string_cmd = self.tool.version_string_cmd + version_string_cmd = self.tool.version_string_cmd + if version_string_cmd: + self.write_version_cmd = "%s > %s 2>&1" % ( version_string_cmd, compute_environment.version_path() ) + else: + self.write_version_cmd = None return self.extra_filenames def default_compute_environment( self, job=None ): @@ -889,7 +893,7 @@ else: final_job_state = job.states.ERROR - if self.version_string_cmd: + if self.write_version_cmd: version_filename = self.get_version_string_path() if os.path.exists(version_filename): self.version_string = open(version_filename).read() @@ -1575,6 +1579,10 @@ def new_file_path( self ): """ Location to dump new files for this job on remote server. """ + @abstractmethod + def version_path( self ): + """ Location of the version file for the underlying tool. """ + class SimpleComputeEnvironment( object ): @@ -1608,6 +1616,9 @@ def new_file_path( self ): return os.path.abspath( self.app.config.new_file_path ) + def version_path( self ): + return self.job_wrapper.get_version_string_path() + class NoopQueue( object ): """ diff -r 88b86067a11d355159446bb98d75928149d08326 -r eb71ed51be40f3bfc523d278b67640bb280cd2e8 lib/galaxy/jobs/command_factory.py --- a/lib/galaxy/jobs/command_factory.py +++ b/lib/galaxy/jobs/command_factory.py @@ -37,9 +37,9 @@ def __handle_version_command(commands_builder, job_wrapper): # Prepend version string - if job_wrapper.version_string_cmd: - version_command = "%s > %s 2>&1" % ( job_wrapper.version_string_cmd, job_wrapper.get_version_string_path() ) - commands_builder.prepend_command(version_command) + write_version_cmd = job_wrapper.write_version_cmd + if write_version_cmd: + commands_builder.prepend_command(write_version_cmd) def __handle_task_splitting(commands_builder, job_wrapper): diff -r 88b86067a11d355159446bb98d75928149d08326 -r eb71ed51be40f3bfc523d278b67640bb280cd2e8 lib/galaxy/jobs/runners/lwr.py --- a/lib/galaxy/jobs/runners/lwr.py +++ b/lib/galaxy/jobs/runners/lwr.py @@ -387,6 +387,11 @@ self._working_directory = remote_job_config[ "working_directory" ] self._sep = remote_job_config[ "system_properties" ][ "separator" ] self._tool_dir = remote_job_config[ "tools_directory" ] + version_path = self.local_path_config.version_path() + new_version_path = self.path_mapper.remote_version_path_rewrite(version_path) + if new_version_path: + version_path = new_version_path + self._version_path = version_path def output_paths( self ): local_output_paths = self._wrapper_output_paths @@ -428,3 +433,6 @@ def sep( self ): return self._sep + + def version_path( self ): + return self._version_path diff -r 88b86067a11d355159446bb98d75928149d08326 -r eb71ed51be40f3bfc523d278b67640bb280cd2e8 test/unit/jobs/test_job_wrapper.py --- a/test/unit/jobs/test_job_wrapper.py +++ b/test/unit/jobs/test_job_wrapper.py @@ -72,7 +72,7 @@ def test_prepare_sets_version_command(self): with self._prepared_wrapper() as wrapper: - assert TEST_VERSION_COMMAND == wrapper.version_string_cmd, wrapper.version_string_cmd + assert TEST_VERSION_COMMAND in wrapper.write_version_cmd, wrapper.write_version_cmd class TaskWrapperTestCase(BaseWrapperTestCase, TestCase): @@ -88,7 +88,7 @@ def test_prepare_sets_no_version_command(self): with self._prepared_wrapper() as wrapper: - assert wrapper.version_string_cmd is None + assert wrapper.write_version_cmd is None class MockEvaluator(object): diff -r 88b86067a11d355159446bb98d75928149d08326 -r eb71ed51be40f3bfc523d278b67640bb280cd2e8 test/unit/test_command_factory.py --- a/test/unit/test_command_factory.py +++ b/test/unit/test_command_factory.py @@ -133,7 +133,7 @@ class MockJobWrapper(object): def __init__(self): - self.version_string_cmd = None + self.write_version_cmd = None self.command_line = MOCK_COMMAND_LINE self.dependency_shell_commands = [] self.metadata_line = None https://bitbucket.org/galaxy/galaxy-central/commits/df387456caef/ Changeset: df387456caef User: jmchilton Date: 2014-02-11 04:34:42 Summary: Touch-up formatting of datatypes/metadata.py for clarity of subsequent changeset. Affected #: 1 file diff -r eb71ed51be40f3bfc523d278b67640bb280cd2e8 -r df387456caefdac23c073194fa5ed589409b7e9d lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -629,6 +629,9 @@ def setup_external_metadata( self, datasets, sa_session, exec_dir=None, tmp_dir=None, dataset_files_path=None, output_fnames=None, config_root=None, config_file=None, datatypes_config=None, job_metadata=None, kwds=None ): kwds = kwds or {} + if tmp_dir is None: + tmp_dir = MetadataTempFile.tmp_dir + #fill in metadata_files_dict and return the command with args required to set metadata def __metadata_files_list_to_cmd_line( metadata_files ): def __get_filename_override(): @@ -637,13 +640,19 @@ if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name: return dataset_path.false_path return "" - return "%s,%s,%s,%s,%s,%s" % ( metadata_files.filename_in, metadata_files.filename_kwds, metadata_files.filename_out, metadata_files.filename_results_code, __get_filename_override(), metadata_files.filename_override_metadata ) + line = "%s,%s,%s,%s,%s,%s" % ( + metadata_files.filename_in, + metadata_files.filename_kwds, + metadata_files.filename_out, + metadata_files.filename_results_code, + __get_filename_override(), + metadata_files.filename_override_metadata + ) + return line if not isinstance( datasets, list ): datasets = [ datasets ] if exec_dir is None: exec_dir = os.path.abspath( os.getcwd() ) - if tmp_dir is None: - tmp_dir = MetadataTempFile.tmp_dir if dataset_files_path is None: dataset_files_path = galaxy.model.Dataset.file_path if config_root is None: https://bitbucket.org/galaxy/galaxy-central/commits/ff4b5fea89bf/ Changeset: ff4b5fea89bf User: jmchilton Date: 2014-02-11 04:34:42 Summary: Modify metadata command generation to allow different working directory path remotely. Utilize in LWR to fixup remote metadata generation when command rewriting is disabled. Affected #: 3 files diff -r df387456caefdac23c073194fa5ed589409b7e9d -r ff4b5fea89bfa44958bfe76eab0ae9532896a0d7 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -18,6 +18,7 @@ import galaxy.model from galaxy.util import listify, stringify_dictionary_keys, string_as_bool from galaxy.util.odict import odict +from galaxy.util import in_directory from galaxy.web import form_builder from sqlalchemy.orm import object_session @@ -627,11 +628,21 @@ return "%s_%d" % ( dataset.__class__.__name__, dataset.id ) def setup_external_metadata( self, datasets, sa_session, exec_dir=None, tmp_dir=None, dataset_files_path=None, - output_fnames=None, config_root=None, config_file=None, datatypes_config=None, job_metadata=None, kwds=None ): + output_fnames=None, config_root=None, config_file=None, datatypes_config=None, job_metadata=None, compute_tmp_dir=None, kwds=None ): kwds = kwds or {} if tmp_dir is None: tmp_dir = MetadataTempFile.tmp_dir + # path is calculated for Galaxy, may be different on compute - rewrite + # for the compute server. + def metadata_path_on_compute(path): + compute_path = path + log.info(compute_tmp_dir) + if compute_tmp_dir and tmp_dir and in_directory(path, tmp_dir): + path_relative = os.path.relpath(path, tmp_dir) + compute_path = os.path.join(compute_tmp_dir, path_relative) + return compute_path + #fill in metadata_files_dict and return the command with args required to set metadata def __metadata_files_list_to_cmd_line( metadata_files ): def __get_filename_override(): @@ -641,13 +652,14 @@ return dataset_path.false_path return "" line = "%s,%s,%s,%s,%s,%s" % ( - metadata_files.filename_in, - metadata_files.filename_kwds, - metadata_files.filename_out, - metadata_files.filename_results_code, + metadata_path_on_compute(metadata_files.filename_in), + metadata_path_on_compute(metadata_files.filename_kwds), + metadata_path_on_compute(metadata_files.filename_out), + metadata_path_on_compute(metadata_files.filename_results_code), __get_filename_override(), - metadata_files.filename_override_metadata + metadata_path_on_compute(metadata_files.filename_override_metadata), ) + log.info(line) return line if not isinstance( datasets, list ): datasets = [ datasets ] diff -r df387456caefdac23c073194fa5ed589409b7e9d -r ff4b5fea89bfa44958bfe76eab0ae9532896a0d7 lib/galaxy/jobs/command_factory.py --- a/lib/galaxy/jobs/command_factory.py +++ b/lib/galaxy/jobs/command_factory.py @@ -79,6 +79,7 @@ config_root = metadata_kwds.get( 'config_root', None ) config_file = metadata_kwds.get( 'config_file', None ) datatypes_config = metadata_kwds.get( 'datatypes_config', None ) + compute_tmp_dir = metadata_kwds.get( 'compute_tmp_dir', None ) metadata_command = job_wrapper.setup_external_metadata( exec_dir=exec_dir, tmp_dir=tmp_dir, @@ -88,6 +89,7 @@ config_root=config_root, config_file=config_file, datatypes_config=datatypes_config, + compute_tmp_dir=compute_tmp_dir, kwds={ 'overwrite' : False } ) or '' metadata_command = metadata_command.strip() diff -r df387456caefdac23c073194fa5ed589409b7e9d -r ff4b5fea89bfa44958bfe76eab0ae9532896a0d7 lib/galaxy/jobs/runners/lwr.py --- a/lib/galaxy/jobs/runners/lwr.py +++ b/lib/galaxy/jobs/runners/lwr.py @@ -351,8 +351,10 @@ metadata_kwds['exec_dir'] = remote_galaxy_home outputs_directory = remote_job_config['outputs_directory'] configs_directory = remote_job_config['configs_directory'] + working_directory = remote_job_config['working_directory'] outputs = [Bunch(false_path=os.path.join(outputs_directory, os.path.basename(path)), real_path=path) for path in self.get_output_files(job_wrapper)] metadata_kwds['output_fnames'] = outputs + metadata_kwds['compute_tmp_dir'] = working_directory metadata_kwds['config_root'] = remote_galaxy_home default_config_file = os.path.join(remote_galaxy_home, 'universe_wsgi.ini') metadata_kwds['config_file'] = remote_system_properties.get('galaxy_config_file', default_config_file) https://bitbucket.org/galaxy/galaxy-central/commits/4e09c9e6e0d3/ Changeset: 4e09c9e6e0d3 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Refactor build_param_dict into smaller methods. Affected #: 1 file diff -r ff4b5fea89bfa44958bfe76eab0ae9532896a0d7 -r 4e09c9e6e0d3c47e4c05a5cfe77e07f39753bde0 lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -104,6 +104,15 @@ param_dict.update( incoming ) input_dataset_paths = dataset_path_rewrites( input_paths ) + self.__populate_wrappers(param_dict, input_dataset_paths) + self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) + self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) + self.__populate_non_job_params(param_dict) + + # Return the dictionary of parameters + return param_dict + + def __populate_wrappers(self, param_dict, input_dataset_paths): def wrap_values( inputs, input_values ): """ @@ -186,6 +195,7 @@ if self.tool.check_values: wrap_values( self.tool.inputs, param_dict ) + def __populate_input_dataset_wrappers(self, param_dict, input_datasets, input_dataset_paths): ## FIXME: when self.check_values==True, input datasets are being wrapped ## twice (above and below, creating 2 separate ## DatasetFilenameWrapper objects - first is overwritten by @@ -219,6 +229,8 @@ if data: for child in data.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) + + def __populate_output_dataset_wrappers(self, param_dict, output_datasets, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites( output_paths ) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) @@ -242,6 +254,7 @@ # NoneDataset param_dict[ out_name ] = NoneDataset( datatypes_registry=self.app.datatypes_registry, ext=output.format ) + def __populate_non_job_params(self, param_dict): # -- Add useful attributes/functions for use in creating command line. # Function for querying a data table. @@ -271,8 +284,6 @@ param_dict['__datatypes_config__'] = param_dict['GALAXY_DATATYPES_CONF_FILE'] = self.app.datatypes_registry.integrated_datatypes_configs param_dict['__admin_users__'] = self.app.config.admin_users param_dict['__user__'] = RawObjectWrapper( param_dict.get( '__user__', None ) ) - # Return the dictionary of parameters - return param_dict def build( self ): """ https://bitbucket.org/galaxy/galaxy-central/commits/5804d9d32de8/ Changeset: 5804d9d32de8 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Refactor wrapping values in build_param_dict. Split wrap_values into two methods - a reusable method for walking the tool param tree that takes in the second method which actually wraps the values at a given "leaf". Affected #: 1 file diff -r 4e09c9e6e0d3c47e4c05a5cfe77e07f39753bde0 -r 5804d9d32de843205970ae5783071eb8ef38ade5 lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -112,88 +112,95 @@ # Return the dictionary of parameters return param_dict - def __populate_wrappers(self, param_dict, input_dataset_paths): + def __walk_inputs(self, inputs, input_values, func): - def wrap_values( inputs, input_values ): + def do_walk( inputs, input_values ): """ Wraps parameters as neccesary. """ for input in inputs.itervalues(): if isinstance( input, Repeat ): for d in input_values[ input.name ]: - wrap_values( input.inputs, d ) + do_walk( input.inputs, d ) elif isinstance( input, Conditional ): values = input_values[ input.name ] current = values["__current_case__"] - wrap_values( input.cases[current].inputs, values ) - elif isinstance( input, DataToolParameter ) and input.multiple: - input_values[ input.name ] = \ - DatasetListWrapper( input_values[ input.name ], - dataset_paths=input_dataset_paths, - datatypes_registry=self.app.datatypes_registry, - tool=self.tool, - name=input.name ) - elif isinstance( input, DataToolParameter ): - ## FIXME: We're populating param_dict with conversions when - ## wrapping values, this should happen as a separate - ## step before wrapping (or call this wrapping step - ## something more generic) (but iterating this same - ## list twice would be wasteful) - # Add explicit conversions by name to current parent - for conversion_name, conversion_extensions, conversion_datatypes in input.conversions: - # If we are at building cmdline step, then converters - # have already executed - conv_ext, converted_dataset = input_values[ input.name ].find_conversion_destination( conversion_datatypes ) - # When dealing with optional inputs, we'll provide a - # valid extension to be used for None converted dataset - if not conv_ext: - conv_ext = conversion_extensions[0] - # input_values[ input.name ] is None when optional - # dataset, 'conversion' of optional dataset should - # create wrapper around NoneDataset for converter output - if input_values[ input.name ] and not converted_dataset: - # Input that converter is based from has a value, - # but converted dataset does not exist - raise Exception( 'A path for explicit datatype conversion has not been found: %s --/--> %s' - % ( input_values[ input.name ].extension, conversion_extensions ) ) - else: - # Trick wrapper into using target conv ext (when - # None) without actually being a tool parameter - input_values[ conversion_name ] = \ - DatasetFilenameWrapper( converted_dataset, - datatypes_registry=self.app.datatypes_registry, - tool=Bunch( conversion_name=Bunch( extensions=conv_ext ) ), - name=conversion_name ) - # Wrap actual input dataset - dataset = input_values[ input.name ] - wrapper_kwds = dict( - datatypes_registry=self.app.datatypes_registry, - tool=self, - name=input.name - ) - real_path = dataset.file_name - if real_path in input_dataset_paths: - wrapper_kwds[ "dataset_path" ] = input_dataset_paths[ real_path ] - input_values[ input.name ] = \ - DatasetFilenameWrapper( dataset, **wrapper_kwds ) - elif isinstance( input, SelectToolParameter ): - input_values[ input.name ] = SelectToolParameterWrapper( - input, input_values[ input.name ], self.app, other_values=param_dict ) + do_walk( input.cases[current].inputs, values ) + else: + func( input_values, input ) - elif isinstance( input, LibraryDatasetToolParameter ): - # TODO: Handle input rewrites in here? How to test LibraryDatasetToolParameters? - input_values[ input.name ] = LibraryDatasetValueWrapper( - input, input_values[ input.name ], param_dict ) + do_walk( inputs, input_values ) - else: - input_values[ input.name ] = InputValueWrapper( - input, input_values[ input.name ], param_dict ) + def __populate_wrappers(self, param_dict, input_dataset_paths): + + def wrap_input( input_values, input ): + if isinstance( input, DataToolParameter ) and input.multiple: + input_values[ input.name ] = \ + DatasetListWrapper( input_values[ input.name ], + dataset_paths=input_dataset_paths, + datatypes_registry=self.app.datatypes_registry, + tool=self.tool, + name=input.name ) + elif isinstance( input, DataToolParameter ): + ## FIXME: We're populating param_dict with conversions when + ## wrapping values, this should happen as a separate + ## step before wrapping (or call this wrapping step + ## something more generic) (but iterating this same + ## list twice would be wasteful) + # Add explicit conversions by name to current parent + for conversion_name, conversion_extensions, conversion_datatypes in input.conversions: + # If we are at building cmdline step, then converters + # have already executed + conv_ext, converted_dataset = input_values[ input.name ].find_conversion_destination( conversion_datatypes ) + # When dealing with optional inputs, we'll provide a + # valid extension to be used for None converted dataset + if not conv_ext: + conv_ext = conversion_extensions[0] + # input_values[ input.name ] is None when optional + # dataset, 'conversion' of optional dataset should + # create wrapper around NoneDataset for converter output + if input_values[ input.name ] and not converted_dataset: + # Input that converter is based from has a value, + # but converted dataset does not exist + raise Exception( 'A path for explicit datatype conversion has not been found: %s --/--> %s' + % ( input_values[ input.name ].extension, conversion_extensions ) ) + else: + # Trick wrapper into using target conv ext (when + # None) without actually being a tool parameter + input_values[ conversion_name ] = \ + DatasetFilenameWrapper( converted_dataset, + datatypes_registry=self.app.datatypes_registry, + tool=Bunch( conversion_name=Bunch( extensions=conv_ext ) ), + name=conversion_name ) + # Wrap actual input dataset + dataset = input_values[ input.name ] + wrapper_kwds = dict( + datatypes_registry=self.app.datatypes_registry, + tool=self, + name=input.name + ) + real_path = dataset.file_name + if real_path in input_dataset_paths: + wrapper_kwds[ "dataset_path" ] = input_dataset_paths[ real_path ] + input_values[ input.name ] = \ + DatasetFilenameWrapper( dataset, **wrapper_kwds ) + elif isinstance( input, SelectToolParameter ): + input_values[ input.name ] = SelectToolParameterWrapper( + input, input_values[ input.name ], self.app, other_values=param_dict ) + + elif isinstance( input, LibraryDatasetToolParameter ): + # TODO: Handle input rewrites in here? How to test LibraryDatasetToolParameters? + input_values[ input.name ] = LibraryDatasetValueWrapper( + input, input_values[ input.name ], param_dict ) + else: + input_values[ input.name ] = InputValueWrapper( + input, input_values[ input.name ], param_dict ) # HACK: only wrap if check_values is not false, this deals with external # tools where the inputs don't even get passed through. These # tools (e.g. UCSC) should really be handled in a special way. if self.tool.check_values: - wrap_values( self.tool.inputs, param_dict ) + self.__walk_inputs( self.tool.inputs, param_dict, wrap_input ) def __populate_input_dataset_wrappers(self, param_dict, input_datasets, input_dataset_paths): ## FIXME: when self.check_values==True, input datasets are being wrapped https://bitbucket.org/galaxy/galaxy-central/commits/a0bf1d3b27af/ Changeset: a0bf1d3b27af User: jmchilton Date: 2014-02-11 04:34:42 Summary: Allow ComputeEnvironment to rewrite 'arbitrary' paths. Previous changes enabled targetted rewriting of specific kinds of paths - working directory, inputs, outputs, extra files, version path, etc.... This change allows rewriting remaining 'unstructured' paths - namely data indices. Right now tool evaluation framework uses this capability only for SelectParameter values and fields - which is where these paths will be for data indices. Changeset lays out the recipe for doing this and the functionality could easily be extended for arbitrary parameters or other specific kinds of inputs. The default ComputeEnvironment does not rewrite any paths obviously, but the abstract base class docstring lays out how to extend a ComputeEnvironment to do this: def unstructured_path_rewriter( self ): """ Return a function that takes in a value, determines if it is path to be rewritten (will be passed non-path values as well - onus is on this function to determine both if its input is a path and if it should be rewritten.) """ The LwrComputeEnviroment has been updated to provide such a rewriter - it will rewrite such paths, and create a dict of paths that need to be transferred, etc.... The LWR server and client side infrastructure that enables this can be found in this changeset - https://bitbucket.org/jmchilton/lwr/commits/63981e79696337399edb42be5614bc72.... This changeset includes tests for changes to wrappers and the tool evaluation module to enable this. Affected #: 7 files diff -r 5804d9d32de843205970ae5783071eb8ef38ade5 -r a0bf1d3b27af23cc6429681b25220bd5560b1545 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1583,6 +1583,14 @@ def version_path( self ): """ Location of the version file for the underlying tool. """ + @abstractmethod + def unstructured_path_rewriter( self ): + """ Return a function that takes in a value, determines if it is path + to be rewritten (will be passed non-path values as well - onus is on + this function to determine both if its input is a path and if it should + be rewritten.) + """ + class SimpleComputeEnvironment( object ): @@ -1592,6 +1600,9 @@ def sep( self ): return os.path.sep + def unstructured_path_rewriter( self ): + return lambda v: v + class SharedComputeEnvironment( SimpleComputeEnvironment ): """ Default ComputeEnviornment for job and task wrapper to pass diff -r 5804d9d32de843205970ae5783071eb8ef38ade5 -r a0bf1d3b27af23cc6429681b25220bd5560b1545 lib/galaxy/jobs/runners/lwr.py --- a/lib/galaxy/jobs/runners/lwr.py +++ b/lib/galaxy/jobs/runners/lwr.py @@ -67,7 +67,7 @@ def queue_job(self, job_wrapper): job_destination = job_wrapper.job_destination - command_line, client, remote_job_config = self.__prepare_job( job_wrapper, job_destination ) + command_line, client, remote_job_config, compute_environment = self.__prepare_job( job_wrapper, job_destination ) if not command_line: return @@ -77,6 +77,10 @@ remote_dependency_resolution = dependency_resolution == "remote" requirements = job_wrapper.tool.requirements if remote_dependency_resolution else [] rewrite_paths = not LwrJobRunner.__rewrite_parameters( client ) + unstructured_path_rewrites = {} + if compute_environment: + unstructured_path_rewrites = compute_environment.unstructured_path_rewrites + client_job_description = ClientJobDescription( command_line=command_line, output_files=self.get_output_files(job_wrapper), @@ -87,6 +91,7 @@ requirements=requirements, version_file=job_wrapper.get_version_string_path(), rewrite_paths=rewrite_paths, + arbitrary_files=unstructured_path_rewrites, ) job_id = lwr_submit_job(client, client_job_description, remote_job_config) log.info("lwr job submitted with job_id %s" % job_id) @@ -110,6 +115,7 @@ command_line = None client = None remote_job_config = None + compute_environment = None try: client = self.get_client_from_wrapper(job_wrapper) tool = job_wrapper.tool @@ -145,7 +151,7 @@ if not command_line: job_wrapper.finish( '', '' ) - return command_line, client, remote_job_config + return command_line, client, remote_job_config, compute_environment def __prepare_input_files_locally(self, job_wrapper): """Run task splitting commands locally.""" @@ -380,6 +386,7 @@ self.lwr_client = lwr_client self.job_wrapper = job_wrapper self.local_path_config = job_wrapper.default_compute_environment() + self.unstructured_path_rewrites = {} # job_wrapper.prepare is going to expunge the job backing the following # computations, so precalculate these paths. self._wrapper_input_paths = self.local_path_config.input_paths() @@ -438,3 +445,24 @@ def version_path( self ): return self._version_path + + def rewriter( self, parameter_value ): + unstructured_path_rewrites = self.unstructured_path_rewrites + if parameter_value in unstructured_path_rewrites: + # Path previously mapped, use previous mapping. + return unstructured_path_rewrites[ parameter_value ] + if parameter_value in unstructured_path_rewrites.itervalues(): + # Path is a rewritten remote path (this might never occur, + # consider dropping check...) + return parameter_value + + rewrite, new_unstructured_path_rewrites = self.path_mapper.check_for_arbitrary_rewrite( parameter_value ) + if rewrite: + unstructured_path_rewrites.update(new_unstructured_path_rewrites) + return rewrite + else: + # Did need to rewrite, use original path or value. + return parameter_value + + def unstructured_path_rewriter( self ): + return self.rewriter diff -r 5804d9d32de843205970ae5783071eb8ef38ade5 -r a0bf1d3b27af23cc6429681b25220bd5560b1545 lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -39,6 +39,7 @@ for evaluating command and config cheetah templates. """ self.compute_environment = compute_environment + self.unstructured_path_rewriter = compute_environment.unstructured_path_rewriter() job = self.job incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) @@ -107,6 +108,7 @@ self.__populate_wrappers(param_dict, input_dataset_paths) self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) + self.__populate_unstructured_path_rewrites(param_dict) self.__populate_non_job_params(param_dict) # Return the dictionary of parameters @@ -186,7 +188,7 @@ DatasetFilenameWrapper( dataset, **wrapper_kwds ) elif isinstance( input, SelectToolParameter ): input_values[ input.name ] = SelectToolParameterWrapper( - input, input_values[ input.name ], self.app, other_values=param_dict ) + input, input_values[ input.name ], self.app, other_values=param_dict, path_rewriter=self.unstructured_path_rewriter ) elif isinstance( input, LibraryDatasetToolParameter ): # TODO: Handle input rewrites in here? How to test LibraryDatasetToolParameters? @@ -292,6 +294,18 @@ param_dict['__admin_users__'] = self.app.config.admin_users param_dict['__user__'] = RawObjectWrapper( param_dict.get( '__user__', None ) ) + def __populate_unstructured_path_rewrites(self, param_dict): + + def rewrite_unstructured_paths( input_values, input ): + if isinstance( input, SelectToolParameter ): + input_values[ input.name ] = SelectToolParameterWrapper( + input, input_values[ input.name ], self.app, other_values=param_dict, path_rewriter=self.unstructured_path_rewriter ) + + if not self.tool.check_values and self.unstructured_path_rewriter: + # The tools weren't "wrapped" yet, but need to be in order to get + #the paths rewritten. + self.__walk_inputs( self.tool.inputs, param_dict, rewrite_unstructured_paths ) + def build( self ): """ Build runtime description of job to execute, evaluate command and diff -r 5804d9d32de843205970ae5783071eb8ef38ade5 -r a0bf1d3b27af23cc6429681b25220bd5560b1545 lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -631,6 +631,9 @@ return None +DEFAULT_VALUE_MAP = lambda x: x + + class SelectToolParameter( ToolParameter ): """ Parameter that takes on one (or many) or a specific set of values. @@ -827,7 +830,7 @@ else: return str( value ) - def to_param_dict_string( self, value, other_values={} ): + def to_param_dict_string( self, value, other_values={}, value_map=DEFAULT_VALUE_MAP ): if value is None: return "None" if isinstance( value, list ): @@ -842,7 +845,9 @@ else: value = sanitize_param( value ) if isinstance( value, list ): - value = self.separator.join( value ) + value = self.separator.join( map( value_map, value ) ) + else: + value = value_map( value ) return value def value_to_basic( self, value, app ): @@ -1425,7 +1430,7 @@ rval.append( val ) return rval - def to_param_dict_string( self, value, other_values={} ): + def to_param_dict_string( self, value, other_values={}, value_map=DEFAULT_VALUE_MAP ): def get_options_list( value ): def get_base_option( value, options ): for option in options: @@ -1456,7 +1461,7 @@ if len( rval ) > 1: if not( self.repeat ): assert self.multiple, "Multiple values provided but parameter is not expecting multiple values" - rval = self.separator.join( rval ) + rval = self.separator.join( map( value_map, rval ) ) if self.tool is None or self.tool.options.sanitize: if self.sanitizer: rval = self.sanitizer.sanitize_param( rval ) diff -r 5804d9d32de843205970ae5783071eb8ef38ade5 -r a0bf1d3b27af23cc6429681b25220bd5560b1545 lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -84,6 +84,9 @@ return getattr( self.value, key ) +DEFAULT_PATH_REWRITER = lambda x: x + + class SelectToolParameterWrapper( ToolParameterValueWrapper ): """ Wraps a SelectTooParameter so that __str__ returns the selected value, but all other @@ -95,26 +98,28 @@ Provide access to any field by name or index for this particular value. Only applicable for dynamic_options selects, which have more than simple 'options' defined (name, value, selected). """ - def __init__( self, input, value, other_values ): + def __init__( self, input, value, other_values, path_rewriter ): self._input = input self._value = value self._other_values = other_values self._fields = {} + self._path_rewriter = path_rewriter def __getattr__( self, name ): if name not in self._fields: self._fields[ name ] = self._input.options.get_field_by_name_for_value( name, self._value, None, self._other_values ) - return self._input.separator.join( map( str, self._fields[ name ] ) ) + return self._input.separator.join( map( self._path_rewriter, map( str, self._fields[ name ] ) ) ) - def __init__( self, input, value, app, other_values={} ): + def __init__( self, input, value, app, other_values={}, path_rewriter=None ): self.input = input self.value = value self.input.value_label = input.value_to_display_text( value, app ) self._other_values = other_values - self.fields = self.SelectToolParameterFieldWrapper( input, value, other_values ) + self._path_rewriter = path_rewriter or DEFAULT_PATH_REWRITER + self.fields = self.SelectToolParameterFieldWrapper( input, value, other_values, self._path_rewriter ) def __str__( self ): - return self.input.to_param_dict_string( self.value, other_values=self._other_values ) + return self.input.to_param_dict_string( self.value, other_values=self._other_values, value_map=self._path_rewriter ) def __getattr__( self, key ): return getattr( self.input, key ) diff -r 5804d9d32de843205970ae5783071eb8ef38ade5 -r a0bf1d3b27af23cc6429681b25220bd5560b1545 test/unit/tools/test_evaluation.py --- a/test/unit/tools/test_evaluation.py +++ b/test/unit/tools/test_evaluation.py @@ -114,6 +114,34 @@ assert open( config_filename, "r").read() == "4" self.assertEquals(command_line, "prog1 %s" % config_filename) + def test_arbitrary_path_rewriting_wrapped( self ): + self.tool.check_values = True + self.__test_arbitrary_path_rewriting() + + def test_arbitrary_path_rewriting_unwrapped( self ): + self.tool.check_values = False + self.__test_arbitrary_path_rewriting() + + def __test_arbitrary_path_rewriting( self ): + self.job.parameters = [ JobParameter( name="index_path", value="\"/old/path/human\"" ) ] + xml = XML('''<param name="index_path" type="select"> + <option value="/old/path/human">Human</option> + <option value="/old/path/mouse">Mouse</option> + </param>''') + parameter = SelectToolParameter( self.tool, xml ) + self.tool.set_params( { + "index_path": parameter + } ) + self.tool._command_line = "prog1 $index_path" + + def test_path_rewriter(v): + if v: + v = v.replace("/old", "/new") + return v + self._set_compute_environment(path_rewriter=test_path_rewriter) + command_line, extra_filenames = self.evaluator.build( ) + self.assertEquals(command_line, "prog1 /new/path/human") + def test_template_property_app( self ): self._assert_template_property_is("$__app__.config.new_file_path", self.app.config.new_file_path) @@ -171,11 +199,13 @@ working_directory, input_paths=[ '/galaxy/files/dataset_1.dat' ], output_paths=[ '/galaxy/files/dataset_2.dat' ], + path_rewriter=None ): self._new_file_path = new_file_path self._working_directory = working_directory self._input_paths = input_paths self._output_paths = output_paths + self._path_rewriter = path_rewriter def input_paths( self ): return self._input_paths @@ -189,6 +219,12 @@ def new_file_path(self): return self._new_file_path + def unstructured_path_rewriter(self): + if self._path_rewriter: + return self._path_rewriter + else: + return super(TestComputeEnviornment, self).unstructured_path_rewriter() + class MockTool( object ): diff -r 5804d9d32de843205970ae5783071eb8ef38ade5 -r a0bf1d3b27af23cc6429681b25220bd5560b1545 test/unit/tools/test_wrappers.py --- a/test/unit/tools/test_wrappers.py +++ b/test/unit/tools/test_wrappers.py @@ -62,6 +62,21 @@ assert wrapper.fields.name == "name1,name2" +@with_mock_tool +def test_select_wrapper_with_path_rewritting(tool): + parameter = _setup_blast_tool(tool, multiple=True) + wrapper = SelectToolParameterWrapper( parameter, ["val1", "val2"], tool.app, other_values={}, path_rewriter=lambda v: "Rewrite<%s>" % v ) + assert str(wrapper) == "Rewrite<val1>,Rewrite<val2>" + assert wrapper.fields.path == "Rewrite<path1>,Rewrite<path2>" + + +@with_mock_tool +def test_select_wrapper_drilldown_path_rewritting(tool): + parameter = _drilldown_parameter(tool) + wrapper = SelectToolParameterWrapper( parameter, ["option3"], tool.app, other_values={}, path_rewriter=lambda v: "Rewrite<%s>" % v ) + assert str(wrapper) == "Rewrite<option3>" + + def test_raw_object_wrapper(): obj = Bunch(x=4) wrapper = RawObjectWrapper(obj) https://bitbucket.org/galaxy/galaxy-central/commits/be47cdc22244/ Changeset: be47cdc22244 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Job related PEP-8/Galaxy style adjustments. Affected #: 2 files diff -r a0bf1d3b27af23cc6429681b25220bd5560b1545 -r be47cdc22244f5499814ad60b3b073a2c6415e19 lib/galaxy/jobs/handler.py --- a/lib/galaxy/jobs/handler.py +++ b/lib/galaxy/jobs/handler.py @@ -20,6 +20,7 @@ JOB_WAIT, JOB_ERROR, JOB_INPUT_ERROR, JOB_INPUT_DELETED, JOB_READY, JOB_DELETED, JOB_ADMIN_DELETED, JOB_USER_OVER_QUOTA = 'wait', 'error', 'input_error', 'input_deleted', 'ready', 'deleted', 'admin_deleted', 'user_over_quota' DEFAULT_JOB_PUT_FAILURE_MESSAGE = 'Unable to run job due to a misconfiguration of the Galaxy job running system. Please contact a site administrator.' + class JobHandler( object ): """ Handle the preparation, running, tracking, and finishing of jobs @@ -31,12 +32,15 @@ # Queues for starting and stopping jobs self.job_queue = JobHandlerQueue( app, self.dispatcher ) self.job_stop_queue = JobHandlerStopQueue( app, self.dispatcher ) + def start( self ): self.job_queue.start() + def shutdown( self ): self.job_queue.shutdown() self.job_stop_queue.shutdown() + class JobHandlerQueue( object ): """ Job manager, waits for jobs to be runnable and then dispatches to @@ -95,14 +99,14 @@ | ( model.Job.state == model.Job.states.RUNNING ) \ | ( model.Job.state == model.Job.states.QUEUED ) ) \ & ( model.Job.handler == self.app.config.server_name ) \ - & or_( ( model.Job.user_id == None ),( model.User.active == True ) ) ).all() + & or_( ( model.Job.user_id == None ), ( model.User.active == True ) ) ).all() else: jobs_at_startup = self.sa_session.query( model.Job ).enable_eagerloads( False ) \ .filter( ( ( model.Job.state == model.Job.states.NEW ) \ | ( model.Job.state == model.Job.states.RUNNING ) \ | ( model.Job.state == model.Job.states.QUEUED ) ) \ & ( model.Job.handler == self.app.config.server_name ) ).all() - + for job in jobs_at_startup: if job.tool_id not in self.app.toolbox.tools_by_id: log.warning( "(%s) Tool '%s' removed from tool config, unable to recover job" % ( job.id, job.tool_id ) ) @@ -160,7 +164,7 @@ over all new and waiting jobs to check the state of the jobs each depends on. If the job has dependencies that have not finished, it it goes to the waiting queue. If the job has dependencies with errors, - it is marked as having errors and removed from the queue. If the job + it is marked as having errors and removed from the queue. If the job belongs to an inactive user it is ignored. Otherwise, the job is dispatched. """ @@ -174,11 +178,11 @@ .join(model.JobToInputDatasetAssociation) \ .join(model.HistoryDatasetAssociation) \ .join(model.Dataset) \ - .filter(and_((model.Job.state == model.Job.states.NEW), - or_((model.HistoryDatasetAssociation._state == model.HistoryDatasetAssociation.states.FAILED_METADATA), - (model.HistoryDatasetAssociation.deleted == True ), - (model.Dataset.state != model.Dataset.states.OK ), - (model.Dataset.deleted == True)))).subquery() + .filter(and_( (model.Job.state == model.Job.states.NEW ), + or_( ( model.HistoryDatasetAssociation._state == model.HistoryDatasetAssociation.states.FAILED_METADATA ), + ( model.HistoryDatasetAssociation.deleted == True ), + ( model.Dataset.state != model.Dataset.states.OK ), + ( model.Dataset.deleted == True) ) ) ).subquery() ldda_not_ready = self.sa_session.query(model.Job.id).enable_eagerloads(False) \ .join(model.JobToInputLibraryDatasetAssociation) \ .join(model.LibraryDatasetDatasetAssociation) \ @@ -192,7 +196,7 @@ jobs_to_check = self.sa_session.query(model.Job).enable_eagerloads(False) \ .outerjoin( model.User ) \ .filter(and_((model.Job.state == model.Job.states.NEW), - or_((model.Job.user_id == None),(model.User.active == True)), + or_((model.Job.user_id == None), (model.User.active == True)), (model.Job.handler == self.app.config.server_name), ~model.Job.table.c.id.in_(hda_not_ready), ~model.Job.table.c.id.in_(ldda_not_ready))) \ @@ -311,7 +315,7 @@ try: self.job_wrappers[job.id].job_destination except Exception, e: - failure_message = getattr(e, 'failure_message', DEFAULT_JOB_PUT_FAILURE_MESSAGE ) + failure_message = getattr( e, 'failure_message', DEFAULT_JOB_PUT_FAILURE_MESSAGE ) if failure_message == DEFAULT_JOB_PUT_FAILURE_MESSAGE: log.exception( 'Failed to generate job destination' ) else: @@ -328,7 +332,7 @@ if usage > quota: return JOB_USER_OVER_QUOTA except AssertionError, e: - pass # No history, should not happen with an anon user + pass # No history, should not happen with an anon user return state def __clear_user_job_count( self ): @@ -465,11 +469,13 @@ log.info( "job handler queue stopped" ) self.dispatcher.shutdown() + class JobHandlerStopQueue( object ): """ A queue for jobs which need to be terminated prematurely. """ STOP_SIGNAL = object() + def __init__( self, app, dispatcher ): self.app = app self.dispatcher = dispatcher @@ -562,7 +568,9 @@ self.sleeper.wake() log.info( "job handler stop queue stopped" ) + class DefaultJobDispatcher( object ): + def __init__( self, app ): self.app = app self.job_runners = self.app.job_config.get_job_runner_plugins( self.app.config.server_name ) @@ -597,9 +605,9 @@ try: if isinstance(job_wrapper, TaskWrapper): #DBTODO Refactor - log.debug( "(%s) Dispatching task %s to %s runner" %( job_wrapper.job_id, job_wrapper.task_id, runner_name ) ) + log.debug( "(%s) Dispatching task %s to %s runner" % ( job_wrapper.job_id, job_wrapper.task_id, runner_name ) ) else: - log.debug( "(%s) Dispatching to %s runner" %( job_wrapper.job_id, runner_name ) ) + log.debug( "(%s) Dispatching to %s runner" % ( job_wrapper.job_id, runner_name ) ) self.job_runners[runner_name].put( job_wrapper ) except KeyError: log.error( 'put(): (%s) Invalid job runner: %s' % ( job_wrapper.job_id, runner_name ) ) @@ -626,9 +634,9 @@ # If we're stopping a task, then the runner_name may be # None, in which case it hasn't been scheduled. if ( None != job.get_job_runner_name() ): - runner_name = (job.get_job_runner_name().split(":",1))[0] + runner_name = ( job.get_job_runner_name().split( ":", 1 ) )[ 0 ] if ( isinstance( job, model.Job ) ): - log.debug( "stopping job %d in %s runner" %( job.get_id(), runner_name ) ) + log.debug( "stopping job %d in %s runner" % ( job.get_id(), runner_name ) ) elif ( isinstance( job, model.Task ) ): log.debug( "Stopping job %d, task %d in %s runner" % ( job.get_job().get_id(), job.get_id(), runner_name ) ) @@ -640,7 +648,7 @@ def recover( self, job, job_wrapper ): runner_name = ( job.job_runner_name.split(":", 1) )[0] - log.debug( "recovering job %d in %s runner" %( job.get_id(), runner_name ) ) + log.debug( "recovering job %d in %s runner" % ( job.get_id(), runner_name ) ) try: self.job_runners[runner_name].recover( job, job_wrapper ) except KeyError: diff -r a0bf1d3b27af23cc6429681b25220bd5560b1545 -r be47cdc22244f5499814ad60b3b073a2c6415e19 lib/galaxy/jobs/runners/__init__.py --- a/lib/galaxy/jobs/runners/__init__.py +++ b/lib/galaxy/jobs/runners/__init__.py @@ -25,7 +25,7 @@ class RunnerParams( object ): - def __init__( self, specs = None, params = None ): + def __init__( self, specs=None, params=None ): self.specs = specs or dict() self.params = params or dict() for name, value in self.params.items(): @@ -51,12 +51,12 @@ self.app = app self.sa_session = app.model.context self.nworkers = nworkers - runner_param_specs = dict( recheck_missing_job_retries = dict( map = int, valid = lambda x: x >= 0, default = 0 ) ) + runner_param_specs = dict( recheck_missing_job_retries=dict( map=int, valid=lambda x: x >= 0, default=0 ) ) if 'runner_param_specs' in kwargs: runner_param_specs.update( kwargs.pop( 'runner_param_specs' ) ) if kwargs: log.debug( 'Loading %s with params: %s', self.runner_name, kwargs ) - self.runner_params = RunnerParams( specs = runner_param_specs, params = kwargs ) + self.runner_params = RunnerParams( specs=runner_param_specs, params=kwargs ) def _init_worker_threads(self): """Start ``nworkers`` worker threads. @@ -138,12 +138,12 @@ # Make sure the job hasn't been deleted if job_state == model.Job.states.DELETED: - log.debug( "(%s) Job deleted by user before it entered the %s queue" % ( job_id, self.runner_name ) ) + log.debug( "(%s) Job deleted by user before it entered the %s queue" % ( job_id, self.runner_name ) ) if self.app.config.cleanup_job in ( "always", "onsuccess" ): job_wrapper.cleanup() return False elif job_state != model.Job.states.QUEUED: - log.info( "(%s) Job is in state %s, skipping execution" % ( job_id, job_state ) ) + log.info( "(%s) Job is in state %s, skipping execution" % ( job_id, job_state ) ) # cleanup may not be safe in all states return False @@ -318,6 +318,7 @@ if attribute not in self.cleanup_file_attributes: self.cleanup_file_attributes.append( attribute ) + class AsynchronousJobRunner( BaseJobRunner ): """Parent class for any job runner that runs jobs asynchronously (e.g. via a distributed resource manager). Provides general methods for having a @@ -364,7 +365,7 @@ # Iterate over the list of watched jobs and check state try: self.check_watched_items() - except Exception, e: + except Exception: log.exception('Unhandled exception checking active jobs') # Sleep a bit before the next state check time.sleep( 1 ) https://bitbucket.org/galaxy/galaxy-central/commits/28d43f4b2eb9/ Changeset: 28d43f4b2eb9 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Introduce DatasetParamContext to simplify DatasetToolParameter... This reduces code duplication related dataset_collectors now and abstracts out important functionality I reuse to collect dataset collections downstream. Affected #: 2 files diff -r be47cdc22244f5499814ad60b3b073a2c6415e19 -r 28d43f4b2eb915e7caf56d34e79556b56772b652 lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -16,6 +16,7 @@ from sanitize import ToolParameterSanitizer import validation import dynamic_options +from .dataset_util import DatasetParamContext # For BaseURLToolParameter from galaxy.web import url_for from galaxy.model.item_attrs import Dictifiable @@ -1610,52 +1611,36 @@ self.conversions.append( ( name, conv_extensions, conv_types ) ) def get_html_field( self, trans=None, value=None, other_values={} ): - filter_value = None - if self.options: - try: - filter_value = self.options.get_options( trans, other_values )[0][0] - except IndexError: - pass # no valid options history = self._get_history( trans ) if value is not None: if type( value ) != list: value = [ value ] + dataset_param_context = DatasetParamContext( trans, history, self, value, other_values ) field = form_builder.SelectField( self.name, self.multiple, None, self.refresh_on_change, refresh_on_change_values=self.refresh_on_change_values ) # CRUCIAL: the dataset_collector function needs to be local to DataToolParameter.get_html_field() def dataset_collector( hdas, parent_hid ): - current_user_roles = trans.get_current_user_roles() for i, hda in enumerate( hdas ): hda_name = hda.name if parent_hid is not None: hid = "%s.%d" % ( parent_hid, i + 1 ) else: hid = str( hda.hid ) - if not hda.dataset.state in [galaxy.model.Dataset.states.ERROR, galaxy.model.Dataset.states.DISCARDED] and \ - ( hda.visible or ( value and hda in value and not hda.implicitly_converted_parent_datasets ) ) and \ - trans.app.security_agent.can_access_dataset( current_user_roles, hda.dataset ): - # If we are sending data to an external application, then we need to make sure there are no roles - # associated with the dataset that restrict it's access from "public". - if self.tool and self.tool.tool_type == 'data_destination' and not trans.app.security_agent.dataset_is_public( hda.dataset ): - continue - if self.options and self._options_filter_attribute( hda ) != filter_value: - continue - if hda.datatype.matches_any( self.formats ): - selected = ( value and ( hda in value ) ) - if hda.visible: - hidden_text = "" - else: - hidden_text = " (hidden)" - field.add_option( "%s:%s %s" % ( hid, hidden_text, hda_name ), hda.id, selected ) + valid_hda = dataset_param_context.valid_hda( hda ) + if not valid_hda: + continue + if not valid_hda.implicit_conversion: + selected = dataset_param_context.selected( hda ) + if hda.visible: + hidden_text = "" else: - target_ext, converted_dataset = hda.find_conversion_destination( self.formats ) - if target_ext: - if converted_dataset: - hda = converted_dataset - if not trans.app.security_agent.can_access_dataset( current_user_roles, hda.dataset ): - continue - selected = ( value and ( hda in value ) ) - field.add_option( "%s: (as %s) %s" % ( hid, target_ext, hda_name ), hda.id, selected ) + hidden_text = " (hidden)" + field.add_option( "%s:%s %s" % ( hid, hidden_text, hda_name ), hda.id, selected ) + else: + hda = valid_hda.hda # Get converted dataset + target_ext = valid_hda.target_ext + selected = dataset_param_context.selected( hda ) + field.add_option( "%s: (as %s) %s" % ( hid, target_ext, hda_name ), hda.id, selected ) # Also collect children via association object dataset_collector( hda.children, hid ) dataset_collector( history.active_datasets_children_and_roles, None ) @@ -1687,30 +1672,18 @@ if trans is None or trans.workflow_building_mode or trans.webapp.name == 'tool_shed': return DummyDataset() history = self._get_history( trans, history ) + dataset_param_context = DatasetParamContext( trans, history, self, None, context ) if self.optional: return None most_recent_dataset = [] - filter_value = None - if self.options: - try: - filter_value = self.options.get_options( trans, context )[0][0] - except IndexError: - pass # no valid options def dataset_collector( datasets ): for i, data in enumerate( datasets ): - if data.visible and not data.deleted and data.state not in [data.states.ERROR, data.states.DISCARDED]: - is_valid = False - if data.datatype.matches_any( self.formats ): - is_valid = True - else: - target_ext, converted_dataset = data.find_conversion_destination( self.formats ) - if target_ext: - is_valid = True - if converted_dataset: - data = converted_dataset - if not is_valid or ( self.options and self._options_filter_attribute( data ) != filter_value ): + if data.visible and dataset_param_context.hda_accessible( data, check_security=False ): + match = dataset_param_context.valid_hda_matches_format( data, check_security=False ) + if not match or dataset_param_context.filter( match.hda ): continue + data = match.hda most_recent_dataset.append(data) # Also collect children via association object dataset_collector( data.children ) diff -r be47cdc22244f5499814ad60b3b073a2c6415e19 -r 28d43f4b2eb915e7caf56d34e79556b56772b652 lib/galaxy/tools/parameters/dataset_util.py --- /dev/null +++ b/lib/galaxy/tools/parameters/dataset_util.py @@ -0,0 +1,96 @@ +import galaxy.model + +from logging import getLogger +log = getLogger( __name__ ) + +ROLES_UNSET = object() +INVALID_STATES = [ galaxy.model.Dataset.states.ERROR, galaxy.model.Dataset.states.DISCARDED ] + + +class DatasetParamContext( object ): + + def __init__( self, trans, history, param, value, other_values ): + self.trans = trans + self.history = history + self.param = param + self.tool = param.tool + self.value = value + self.current_user_roles = ROLES_UNSET + filter_value = None + if param.options: + try: + filter_value = param.options.get_options( trans, other_values )[0][0] + except IndexError: + pass # no valid options + self.filter_value = filter_value + + def hda_accessible( self, hda, check_security=True ): + dataset = hda.dataset + state_valid = not dataset.state in INVALID_STATES + return state_valid and (not check_security or self.__can_access_dataset( dataset ) ) + + def valid_hda_matches_format( self, hda, check_implicit_conversions=True, check_security=False ): + if self.filter( hda ): + return False + formats = self.param.formats + if hda.datatype.matches_any( formats ): + return ValidParamHdaDirect( hda ) + if not check_implicit_conversions: + return False + target_ext, converted_dataset = hda.find_conversion_destination( formats ) + if target_ext: + if converted_dataset: + hda = converted_dataset + if check_security and not self.__can_access_dataset( hda.dataset ): + return False + return ValidParamHdaImplicit(converted_dataset, target_ext) + return False + + def valid_hda( self, hda, check_implicit_conversions=True ): + accessible = self.hda_accessible( hda ) + if accessible and ( hda.visible or ( self.selected( hda ) and not hda.implicitly_converted_parent_datasets ) ): + # If we are sending data to an external application, then we need to make sure there are no roles + # associated with the dataset that restrict it's access from "public". + require_public = self.tool and self.tool.tool_type == 'data_destination' + if require_public and not self.trans.app.security_agent.dataset_is_public( hda.dataset ): + return False + if self.filter( hda ): + return False + return self.valid_hda_matches_format(hda) + + def selected( self, hda ): + value = self.value + return value and hda in value + + def filter( self, hda ): + param = self.param + return param.options and param._options_filter_attribute( hda ) != self.filter_value + + def __can_access_dataset( self, dataset ): + if self.current_user_roles is ROLES_UNSET: + self.current_user_roles = self.trans.get_current_user_roles() + return self.trans.app.security_agent.can_access_dataset( self.current_user_roles, dataset ) + + +class ValidParamHdaDirect( object ): + + def __init__( self, hda ): + self.hda = hda + + @property + def implicit_conversion( self ): + return False + + +class ValidParamHdaImplicit( object ): + + def __init__( self, converted_dataset, target_ext ): + self.hda = converted_dataset + self.target_ext = target_ext + + @property + def implicit_conversion( self ): + return True + + +__all__ = [ DatasetParamContext ] https://bitbucket.org/galaxy/galaxy-central/commits/924ceb1c1e8d/ Changeset: 924ceb1c1e8d User: jmchilton Date: 2014-02-11 04:34:42 Summary: Add get_output_name to DefaultToolAction. Slightly simplfies default execute but more importantly will allow reuse of logic when naming dataset collections. Affected #: 1 file diff -r 28d43f4b2eb915e7caf56d34e79556b56772b652 -r 924ceb1c1e8d0d7bdb6fa8fb00e0d4a9991f50eb lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -352,23 +352,10 @@ data.state = data.states.QUEUED data.blurb = "queued" # Set output label - if output.label: - if params is None: - params = make_dict_copy( incoming ) - # wrapping the params allows the tool config to contain things like - # <outputs> - # <data format="input" name="output" label="Blat on ${<input_param>.name}" /> - # </outputs> - wrap_values( tool.inputs, params, skip_missing_values=not tool.check_values ) - #tool (only needing to be set once) and on_string (set differently for each label) are overwritten for each output dataset label being determined - params['tool'] = tool - params['on_string'] = on_text - data.name = fill_template( output.label, context=params ) - else: - if params is None: - params = make_dict_copy( incoming ) - wrap_values( tool.inputs, params, skip_missing_values=not tool.check_values ) - data.name = self._get_default_data_name( data, tool, on_text=on_text, trans=trans, incoming=incoming, history=history, params=params, job_params=job_params ) + if params is None: + params = make_dict_copy( incoming ) + wrap_values( tool.inputs, params, skip_missing_values=not tool.check_values ) + data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, params, job_params ) # Store output out_data[ name ] = data if output.actions: @@ -489,6 +476,14 @@ trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id ) return job, out_data + def get_output_name( self, output, dataset, tool, on_text, trans, incoming, history, params, job_params ): + if output.label: + params['tool'] = tool + params['on_string'] = on_text + return fill_template( output.label, context=params ) + else: + return self._get_default_data_name( dataset, tool, on_text=on_text, trans=trans, incoming=incoming, history=history, params=params, job_params=job_params ) + def _get_default_data_name( self, dataset, tool, on_text=None, trans=None, incoming=None, history=None, params=None, job_params=None, **kwd ): name = tool.name if on_text: https://bitbucket.org/galaxy/galaxy-central/commits/3d075d5c7eff/ Changeset: 3d075d5c7eff User: jmchilton Date: 2014-02-11 04:34:42 Summary: Refactor on_text generation for reuse when naming dataset collections. Affected #: 1 file diff -r 924ceb1c1e8d0d7bdb6fa8fb00e0d4a9991f50eb -r 3d075d5c7effacbff1cf61c1ab0e2686230c18a1 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -246,17 +246,10 @@ else: # No valid inputs, we will use history defaults output_permissions = trans.app.security_agent.history_get_default_permissions( history ) + # Build name for output datasets based on tool name and input names - if len( input_names ) == 1: - on_text = input_names[0] - elif len( input_names ) == 2: - on_text = '%s and %s' % tuple(input_names[0:2]) - elif len( input_names ) == 3: - on_text = '%s, %s, and %s' % tuple(input_names[0:3]) - elif len( input_names ) > 3: - on_text = '%s, %s, and others' % tuple(input_names[0:2]) - else: - on_text = "" + on_text = on_text_for_names( input_names ) + # Add the dbkey to the incoming parameters incoming[ "dbkey" ] = input_dbkey params = None # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed @@ -489,3 +482,18 @@ if on_text: name += ( " on " + on_text ) return name + + +def on_text_for_names( input_names ): + # Build name for output datasets based on tool name and input names + if len( input_names ) == 1: + on_text = input_names[0] + elif len( input_names ) == 2: + on_text = '%s and %s' % tuple(input_names[0:2]) + elif len( input_names ) == 3: + on_text = '%s, %s, and %s' % tuple(input_names[0:3]) + elif len( input_names ) > 3: + on_text = '%s, %s, and others' % tuple(input_names[0:2]) + else: + on_text = "" + return on_text https://bitbucket.org/galaxy/galaxy-central/commits/88799789ebd4/ Changeset: 88799789ebd4 User: jmchilton Date: 2014-02-11 04:34:42 Summary: Refactor stateful parameter wrapping logic out of DefaultToolAction... ... into new galaxy.tools.parameters.wrapped module. This slightly simplifies this execute method and reduces code duplication, but more importantly I reuse this method downstream for naming dataset collections. Affected #: 2 files diff -r 3d075d5c7effacbff1cf61c1ab0e2686230c18a1 -r 88799789ebd43337c3de71ed56cbdaf225b8210c lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -1,11 +1,9 @@ import os -import galaxy.tools from galaxy.exceptions import ObjectInvalid from galaxy.model import LibraryDatasetDatasetAssociation from galaxy.tools.parameters import DataToolParameter -from galaxy.tools.parameters import SelectToolParameter -from galaxy.tools.parameters.grouping import Conditional, Repeat +from galaxy.tools.parameters.wrapped import WrappedParameters from galaxy.util.json import from_json_string from galaxy.util.json import to_json_string from galaxy.util.none_like import NoneDataset @@ -119,63 +117,6 @@ submitting the job to the job queue. If history is not specified, use trans.history as destination for tool's output datasets. """ - def make_dict_copy( from_dict ): - """ - Makes a copy of input dictionary from_dict such that all values that are dictionaries - result in creation of a new dictionary ( a sort of deepcopy ). We may need to handle - other complex types ( e.g., lists, etc ), but not sure... - Yes, we need to handle lists (and now are)... - """ - copy_from_dict = {} - for key, value in from_dict.items(): - if type( value ).__name__ == 'dict': - copy_from_dict[ key ] = make_dict_copy( value ) - elif isinstance( value, list ): - copy_from_dict[ key ] = make_list_copy( value ) - else: - copy_from_dict[ key ] = value - return copy_from_dict - - def make_list_copy( from_list ): - new_list = [] - for value in from_list: - if isinstance( value, dict ): - new_list.append( make_dict_copy( value ) ) - elif isinstance( value, list ): - new_list.append( make_list_copy( value ) ) - else: - new_list.append( value ) - return new_list - - def wrap_values( inputs, input_values, skip_missing_values=False ): - # Wrap tool inputs as necessary - for input in inputs.itervalues(): - if input.name not in input_values and skip_missing_values: - continue - if isinstance( input, Repeat ): - for d in input_values[ input.name ]: - wrap_values( input.inputs, d, skip_missing_values=skip_missing_values ) - elif isinstance( input, Conditional ): - values = input_values[ input.name ] - current = values[ "__current_case__" ] - wrap_values( input.cases[current].inputs, values, skip_missing_values=skip_missing_values ) - elif isinstance( input, DataToolParameter ) and input.multiple: - input_values[ input.name ] = \ - galaxy.tools.DatasetListWrapper( input_values[ input.name ], - datatypes_registry=trans.app.datatypes_registry, - tool=tool, - name=input.name ) - elif isinstance( input, DataToolParameter ): - input_values[ input.name ] = \ - galaxy.tools.DatasetFilenameWrapper( input_values[ input.name ], - datatypes_registry=trans.app.datatypes_registry, - tool=tool, - name=input.name ) - elif isinstance( input, SelectToolParameter ): - input_values[ input.name ] = galaxy.tools.SelectToolParameterWrapper( input, input_values[ input.name ], tool.app, other_values=incoming ) - else: - input_values[ input.name ] = galaxy.tools.InputValueWrapper( input, input_values[ input.name ], incoming ) - # Set history. if not history: history = tool.get_default_history_by_trans( trans, create=True ) @@ -232,7 +173,7 @@ elif 'len' in custom_build_dict: # Build is defined by len file, so use it. chrom_info = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( custom_build_dict[ 'len' ] ).file_name - + if not chrom_info: # Default to built-in build. chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % input_dbkey ) @@ -252,7 +193,8 @@ # Add the dbkey to the incoming parameters incoming[ "dbkey" ] = input_dbkey - params = None # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed + # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed + wrapped_params = WrappedParameters( trans, tool, incoming ) # Keep track of parent / child relationships, we'll create all the # datasets first, then create the associations parent_to_child_pairs = [] @@ -293,9 +235,6 @@ #process change_format tags if output.change_format: - if params is None: - params = make_dict_copy( incoming ) - wrap_values( tool.inputs, params, skip_missing_values=not tool.check_values ) for change_elem in output.change_format: for when_elem in change_elem.findall( 'when' ): check = when_elem.get( 'input', None ) @@ -304,7 +243,7 @@ if '$' not in check: #allow a simple name or more complex specifications check = '${%s}' % check - if str( fill_template( check, context=params ) ) == when_elem.get( 'value', None ): + if str( fill_template( check, context=wrapped_params.params ) ) == when_elem.get( 'value', None ): ext = when_elem.get( 'format', ext ) except: # bad tag input value; possibly referencing a param within a different conditional when block or other nonexistent grouping construct continue @@ -345,10 +284,7 @@ data.state = data.states.QUEUED data.blurb = "queued" # Set output label - if params is None: - params = make_dict_copy( incoming ) - wrap_values( tool.inputs, params, skip_missing_values=not tool.check_values ) - data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, params, job_params ) + data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params ) # Store output out_data[ name ] = data if output.actions: diff -r 3d075d5c7effacbff1cf61c1ab0e2686230c18a1 -r 88799789ebd43337c3de71ed56cbdaf225b8210c lib/galaxy/tools/parameters/wrapped.py --- /dev/null +++ b/lib/galaxy/tools/parameters/wrapped.py @@ -0,0 +1,94 @@ +import galaxy.tools + +from galaxy.tools.parameters.basic import ( + DataToolParameter, + SelectToolParameter, +) +from galaxy.tools.parameters.grouping import ( + Repeat, + Conditional, +) +PARAMS_UNWRAPPED = object() + + +class WrappedParameters( object ): + + def __init__( self, trans, tool, incoming ): + self.trans = trans + self.tool = tool + self.incoming = incoming + self._params = PARAMS_UNWRAPPED + + @property + def params( self ): + if self._params is PARAMS_UNWRAPPED: + params = make_dict_copy( self.incoming ) + self.wrap_values( self.tool.inputs, params, skip_missing_values=not self.tool.check_values ) + self._params = params + return self._params + + def wrap_values( self, inputs, input_values, skip_missing_values=False ): + trans = self.trans + tool = self.tool + incoming = self.incoming + + # Wrap tool inputs as necessary + for input in inputs.itervalues(): + if input.name not in input_values and skip_missing_values: + continue + if isinstance( input, Repeat ): + for d in input_values[ input.name ]: + self.wrap_values( input.inputs, d, skip_missing_values=skip_missing_values ) + elif isinstance( input, Conditional ): + values = input_values[ input.name ] + current = values[ "__current_case__" ] + self.wrap_values( input.cases[current].inputs, values, skip_missing_values=skip_missing_values ) + elif isinstance( input, DataToolParameter ) and input.multiple: + input_values[ input.name ] = \ + galaxy.tools.DatasetListWrapper( input_values[ input.name ], + datatypes_registry=trans.app.datatypes_registry, + tool=tool, + name=input.name ) + elif isinstance( input, DataToolParameter ): + input_values[ input.name ] = \ + galaxy.tools.DatasetFilenameWrapper( input_values[ input.name ], + datatypes_registry=trans.app.datatypes_registry, + tool=tool, + name=input.name ) + elif isinstance( input, SelectToolParameter ): + input_values[ input.name ] = galaxy.tools.SelectToolParameterWrapper( input, input_values[ input.name ], tool.app, other_values=incoming ) + else: + input_values[ input.name ] = galaxy.tools.InputValueWrapper( input, input_values[ input.name ], incoming ) + + +def make_dict_copy( from_dict ): + """ + Makes a copy of input dictionary from_dict such that all values that are dictionaries + result in creation of a new dictionary ( a sort of deepcopy ). We may need to handle + other complex types ( e.g., lists, etc ), but not sure... + Yes, we need to handle lists (and now are)... + """ + copy_from_dict = {} + for key, value in from_dict.items(): + if type( value ).__name__ == 'dict': + copy_from_dict[ key ] = make_dict_copy( value ) + elif isinstance( value, list ): + copy_from_dict[ key ] = make_list_copy( value ) + else: + copy_from_dict[ key ] = value + return copy_from_dict + + +def make_list_copy( from_list ): + new_list = [] + for value in from_list: + if isinstance( value, dict ): + new_list.append( make_dict_copy( value ) ) + elif isinstance( value, list ): + new_list.append( make_list_copy( value ) ) + else: + new_list.append( value ) + return new_list + + +__all__ = [ WrappedParameters, make_dict_copy ] https://bitbucket.org/galaxy/galaxy-central/commits/b95d750d1eed/ Changeset: b95d750d1eed User: jmchilton Date: 2014-02-11 04:34:42 Summary: Outline galaxy.tools.js and load in tool_form.mako. Migrated the logic out of tool_form.mako that I could readily test, the rest of the custom javascript in tool_form.mako should likewise be migrated I assume. Holds a lot of logic downstream in my work related to running tools in parallel across many datasets and over dataset collections - https://github.com/jmchilton/galaxy-central/blob/collections_1/static/script.... Affected #: 2 files diff -r 88799789ebd43337c3de71ed56cbdaf225b8210c -r b95d750d1eed64c19c0e31d6683f0f8dca73aeb3 static/scripts/galaxy.tools.js --- /dev/null +++ b/static/scripts/galaxy.tools.js @@ -0,0 +1,20 @@ +// dependencies +define([ "mvc/tools" ], function( Tools ) { + + var checkUncheckAll = function( name, check ) { + $("input[name='" + name + "'][type='checkbox']").attr('checked', !!check); + } + + // Inserts the Select All / Unselect All buttons for checkboxes + $("div.checkUncheckAllPlaceholder").each( function() { + var check_name = $(this).attr("checkbox_name"); + select_link = $("<a class='action-button'></a>").text("Select All").click(function() { + checkUncheckAll(check_name, true); + }); + unselect_link = $("<a class='action-button'></a>").text("Unselect All").click(function() { + checkUncheckAll(check_name, false); + }); + $(this).append(select_link).append(" ").append(unselect_link); + }); + +}); diff -r 88799789ebd43337c3de71ed56cbdaf225b8210c -r b95d750d1eed64c19c0e31d6683f0f8dca73aeb3 templates/webapps/galaxy/tool_form.mako --- a/templates/webapps/galaxy/tool_form.mako +++ b/templates/webapps/galaxy/tool_form.mako @@ -15,6 +15,9 @@ ${parent.javascripts()} ${h.js( "galaxy.panels", "libs/jquery/jstorage" )} <script type="text/javascript"> + require( [ "galaxy.tools" ] ); + </script> + <script type="text/javascript"> $(function() { $(window).bind("refresh_on_change", function() { $(':file').each( function() { @@ -58,22 +61,6 @@ }); }); - function checkUncheckAll( name, check ) { - $("input[name='" + name + "'][type='checkbox']").attr('checked', !!check); - } - - // Inserts the Select All / Unselect All buttons for checkboxes - $("div.checkUncheckAllPlaceholder").each( function() { - var check_name = $(this).attr("checkbox_name"); - select_link = $("<a class='action-button'></a>").text("Select All").click(function() { - checkUncheckAll(check_name, true); - }); - unselect_link = $("<a class='action-button'></a>").text("Unselect All").click(function() { - checkUncheckAll(check_name, false); - }); - $(this).append(select_link).append(" ").append(unselect_link); - }); - $(".add-librarydataset").click(function() { var link = $(this); $.ajax({ Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.