[hg] galaxy 2414: Move files_path to the job working directory a...

21 May 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/1a82088aac38
changeset: 2414:1a82088aac38
user:      Nate Coraor <nate@bx.psu.edu>
date:      Thu May 21 11:32:25 2009 -0400
description:
Move files_path to the job working directory and fix ugly bugs that broke files_path.

2 file(s) affected in this change:

lib/galaxy/jobs/__init__.py
lib/galaxy/tools/__init__.py

diffs (89 lines):

diff -r a54fd974365d -r 1a82088aac38 lib/galaxy/jobs/__init__.py

--- a/lib/galaxy/jobs/__init__.py	Thu May 21 10:17:56 2009 -0400
+++ b/lib/galaxy/jobs/__init__.py	Thu May 21 11:32:25 2009 -0400
@@ -351,7 +351,7 @@
         incoming['userId'] = userId
         incoming['userEmail'] = userEmail
         # Build params, done before hook so hook can use
-        param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, self.get_output_fnames() )
+        param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, self.get_output_fnames(), self.working_directory )
         # Certain tools require tasks to be completed prior to job execution
         # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ).
         if self.tool.tool_type is not None:
@@ -529,7 +529,7 @@
         param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] ) # why not re-use self.param_dict here? ##dunno...probably should, this causes tools.parameters.basic.UnvalidatedValue to be used in following methods instead of validated and transformed values during i.e. running workflows
         param_dict = self.tool.params_from_strings( param_dict, self.app )
         # Check for and move associated_files
-        self.tool.collect_associated_files(out_data)
+        self.tool.collect_associated_files(out_data, self.working_directory)
         # Create generated output children and primary datasets and add to param_dict
         collected_datasets = {'children':self.tool.collect_child_datasets(out_data),'primary':self.tool.collect_primary_datasets(out_data)}
         param_dict.update({'__collected_datasets__':collected_datasets})
@@ -596,7 +596,7 @@
         job = model.Job.get( self.job_id )
         if self.app.config.outputs_to_working_directory:
             self.output_paths = []
-            for name, data in [ ( da.name, da.dataset ) for da in job.output_datasets ]:
+            for name, data in [ ( da.name, da.dataset.dataset ) for da in job.output_datasets ]:
                 false_path = os.path.abspath( os.path.join( self.working_directory, "galaxy_dataset_%d.dat" % data.id ) )
                 self.output_paths.append( DatasetPath( data.file_name, false_path ) )
         else:
diff -r a54fd974365d -r 1a82088aac38 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py	Thu May 21 10:17:56 2009 -0400
+++ b/lib/galaxy/tools/__init__.py	Thu May 21 11:32:25 2009 -0400
@@ -1140,7 +1140,7 @@
             message = e.message
         return message
     
-    def build_param_dict( self, incoming, input_datasets, output_datasets, output_paths ):
+    def build_param_dict( self, incoming, input_datasets, output_datasets, output_paths, job_working_directory ):
         """
         Build the dictionary of parameters for substituting into the command
         line. Each value is wrapped in a `InputValueWrapper`, which allows
@@ -1191,22 +1191,22 @@
             if data:
                 for child in data.children:
                     param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child )
-        for name, data in output_datasets.items():
+        for name, hda in output_datasets.items():
             # Write outputs to the working directory (for security purposes) if desired.
             if self.app.config.outputs_to_working_directory:
                 try:
-                    false_path = [ dp.false_path for dp in output_paths if dp.real_path == data.file_name ][0]
-                    param_dict[name] = DatasetFilenameWrapper( data, false_path = false_path )
+                    false_path = [ dp.false_path for dp in output_paths if dp.real_path == hda.file_name ][0]
+                    param_dict[name] = DatasetFilenameWrapper( hda, false_path = false_path )
                     open( false_path, 'w' ).close()
                 except IndexError:
                     log.warning( "Unable to determine alternate path for writing job outputs, outputs will be written to their real paths" )
-                    param_dict[name] = DatasetFilenameWrapper( data )
+                    param_dict[name] = DatasetFilenameWrapper( hda )
             else:
-                param_dict[name] = DatasetFilenameWrapper( data )
+                param_dict[name] = DatasetFilenameWrapper( hda )
             # Provide access to a path to store additional files
             # TODO: path munging for cluster/dataset server relocatability
-            param_dict[name].files_path = os.path.abspath(os.path.join(self.app.config.new_file_path, "dataset_%s_files" % (data.id) ))
-            for child in data.children:
+            param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) ))
+            for child in hda.children:
                 param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child )
         # We add access to app here, this allows access to app.config, etc
         param_dict['__app__'] = RawObjectWrapper( self.app )
@@ -1394,12 +1394,12 @@
             data.set_peek()
             data.flush()
 
-    def collect_associated_files( self, output ):
-        for name, outdata in output.items():
-            temp_file_path = os.path.join( self.app.config.new_file_path, "dataset_%s_files" % ( outdata.id ) )
+    def collect_associated_files( self, output, job_working_directory ):
+        for name, hda in output.items():
+            temp_file_path = os.path.join( job_working_directory, "dataset_%s_files" % ( hda.dataset.id ) )
             try:
                 if len( os.listdir( temp_file_path ) ) > 0:
-                    store_file_path = os.path.join( os.path.join( self.app.config.file_path, *directory_hash_id( outdata.id ) ), "dataset_%d_files" % outdata.id )
+                    store_file_path = os.path.join( os.path.join( self.app.config.file_path, *directory_hash_id( hda.dataset.id ) ), "dataset_%d_files" % hda.dataset.id )
                     shutil.move( temp_file_path, store_file_path )
             except:
                 continue

    

Nate Coraor

tags

participants (1)