[hg] galaxy 2414: Move files_path to the job working directory a...
details: http://www.bx.psu.edu/hg/galaxy/rev/1a82088aac38 changeset: 2414:1a82088aac38 user: Nate Coraor <nate@bx.psu.edu> date: Thu May 21 11:32:25 2009 -0400 description: Move files_path to the job working directory and fix ugly bugs that broke files_path. 2 file(s) affected in this change: lib/galaxy/jobs/__init__.py lib/galaxy/tools/__init__.py diffs (89 lines): diff -r a54fd974365d -r 1a82088aac38 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Thu May 21 10:17:56 2009 -0400 +++ b/lib/galaxy/jobs/__init__.py Thu May 21 11:32:25 2009 -0400 @@ -351,7 +351,7 @@ incoming['userId'] = userId incoming['userEmail'] = userEmail # Build params, done before hook so hook can use - param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, self.get_output_fnames() ) + param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, self.get_output_fnames(), self.working_directory ) # Certain tools require tasks to be completed prior to job execution # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). if self.tool.tool_type is not None: @@ -529,7 +529,7 @@ param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] ) # why not re-use self.param_dict here? ##dunno...probably should, this causes tools.parameters.basic.UnvalidatedValue to be used in following methods instead of validated and transformed values during i.e. running workflows param_dict = self.tool.params_from_strings( param_dict, self.app ) # Check for and move associated_files - self.tool.collect_associated_files(out_data) + self.tool.collect_associated_files(out_data, self.working_directory) # Create generated output children and primary datasets and add to param_dict collected_datasets = {'children':self.tool.collect_child_datasets(out_data),'primary':self.tool.collect_primary_datasets(out_data)} param_dict.update({'__collected_datasets__':collected_datasets}) @@ -596,7 +596,7 @@ job = model.Job.get( self.job_id ) if self.app.config.outputs_to_working_directory: self.output_paths = [] - for name, data in [ ( da.name, da.dataset ) for da in job.output_datasets ]: + for name, data in [ ( da.name, da.dataset.dataset ) for da in job.output_datasets ]: false_path = os.path.abspath( os.path.join( self.working_directory, "galaxy_dataset_%d.dat" % data.id ) ) self.output_paths.append( DatasetPath( data.file_name, false_path ) ) else: diff -r a54fd974365d -r 1a82088aac38 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Thu May 21 10:17:56 2009 -0400 +++ b/lib/galaxy/tools/__init__.py Thu May 21 11:32:25 2009 -0400 @@ -1140,7 +1140,7 @@ message = e.message return message - def build_param_dict( self, incoming, input_datasets, output_datasets, output_paths ): + def build_param_dict( self, incoming, input_datasets, output_datasets, output_paths, job_working_directory ): """ Build the dictionary of parameters for substituting into the command line. Each value is wrapped in a `InputValueWrapper`, which allows @@ -1191,22 +1191,22 @@ if data: for child in data.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) - for name, data in output_datasets.items(): + for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) if desired. if self.app.config.outputs_to_working_directory: try: - false_path = [ dp.false_path for dp in output_paths if dp.real_path == data.file_name ][0] - param_dict[name] = DatasetFilenameWrapper( data, false_path = false_path ) + false_path = [ dp.false_path for dp in output_paths if dp.real_path == hda.file_name ][0] + param_dict[name] = DatasetFilenameWrapper( hda, false_path = false_path ) open( false_path, 'w' ).close() except IndexError: log.warning( "Unable to determine alternate path for writing job outputs, outputs will be written to their real paths" ) - param_dict[name] = DatasetFilenameWrapper( data ) + param_dict[name] = DatasetFilenameWrapper( hda ) else: - param_dict[name] = DatasetFilenameWrapper( data ) + param_dict[name] = DatasetFilenameWrapper( hda ) # Provide access to a path to store additional files # TODO: path munging for cluster/dataset server relocatability - param_dict[name].files_path = os.path.abspath(os.path.join(self.app.config.new_file_path, "dataset_%s_files" % (data.id) )) - for child in data.children: + param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) )) + for child in hda.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper( self.app ) @@ -1394,12 +1394,12 @@ data.set_peek() data.flush() - def collect_associated_files( self, output ): - for name, outdata in output.items(): - temp_file_path = os.path.join( self.app.config.new_file_path, "dataset_%s_files" % ( outdata.id ) ) + def collect_associated_files( self, output, job_working_directory ): + for name, hda in output.items(): + temp_file_path = os.path.join( job_working_directory, "dataset_%s_files" % ( hda.dataset.id ) ) try: if len( os.listdir( temp_file_path ) ) > 0: - store_file_path = os.path.join( os.path.join( self.app.config.file_path, *directory_hash_id( outdata.id ) ), "dataset_%d_files" % outdata.id ) + store_file_path = os.path.join( os.path.join( self.app.config.file_path, *directory_hash_id( hda.dataset.id ) ), "dataset_%d_files" % hda.dataset.id ) shutil.move( temp_file_path, store_file_path ) except: continue
participants (1)
-
Nate Coraor