[galaxy-dev] [hg] galaxy 1695: Add an option to override the output dataset(s...

9 Jan 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/94df4d059b19
changeset: 1695:94df4d059b19
user:      Nate Coraor <nate@bx.psu.edu>
date:      Fri Jan 09 15:59:29 2009 -0500
description:
Add an option to override the output dataset(s) path, so they may be written to
the working directory.  This allows administrators to mount datasets read-only
on cluster nodes.

3 file(s) affected in this change:

lib/galaxy/config.py
lib/galaxy/jobs/__init__.py
lib/galaxy/tools/__init__.py

diffs (100 lines):

diff -r 58744d4bda7b -r 94df4d059b19 lib/galaxy/config.py

--- a/lib/galaxy/config.py	Fri Jan 09 14:07:16 2009 -0500
+++ b/lib/galaxy/config.py	Fri Jan 09 15:59:29 2009 -0500
@@ -45,6 +45,7 @@
         self.job_scheduler_policy = kwargs.get("job_scheduler_policy", "FIFO")
         self.job_queue_cleanup_interval = int( kwargs.get("job_queue_cleanup_interval", "5") )
         self.job_working_directory = resolve_path( kwargs.get( "job_working_directory", "database/job_working_directory" ), self.root )
+        self.outputs_to_working_directory = string_as_bool( kwargs.get( 'outputs_to_working_directory', False ) )
         self.admin_pass = kwargs.get('admin_pass',"galaxy")
         self.sendmail_path = kwargs.get('sendmail_path',"/usr/sbin/sendmail")
         self.mailing_join_addr = kwargs.get('mailing_join_addr',"galaxy-user-join@bx.psu.edu")
diff -r 58744d4bda7b -r 94df4d059b19 lib/galaxy/jobs/__init__.py
--- a/lib/galaxy/jobs/__init__.py	Fri Jan 09 14:07:16 2009 -0500
+++ b/lib/galaxy/jobs/__init__.py	Fri Jan 09 15:59:29 2009 -0500
@@ -350,7 +350,7 @@
         incoming['userId'] = userId
         incoming['userEmail'] = userEmail
         # Build params, done before hook so hook can use
-        param_dict = self.tool.build_param_dict( incoming, inp_data, out_data )
+        param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, self.working_directory )
         # Certain tools require tasks to be completed prior to job execution
         # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ).
         if self.tool.tool_type is not None:
@@ -471,6 +471,13 @@
         else:
             job.state = 'ok'
         for dataset_assoc in job.output_datasets:
+            if self.app.config.outputs_to_working_directory:
+                false_path = os.path.abspath( os.path.join( self.working_directory, "galaxy_dataset_%d.dat" % dataset_assoc.dataset.id ) )
+                if os.path.exists( false_path ):
+                    os.rename( false_path, dataset_assoc.dataset.file_name )
+                    log.debug( "Moved %s to %s" % ( false_path, dataset_assoc.dataset.file_name ) )
+                else:
+                    log.warning( "Missing output file in working directory: %s" % false_path )
             for dataset in dataset_assoc.dataset.dataset.history_associations: #need to update all associated output hdas, i.e. history was shared with job running
                 dataset.blurb = 'done'
                 dataset.peek  = 'no peek'
diff -r 58744d4bda7b -r 94df4d059b19 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py	Fri Jan 09 14:07:16 2009 -0500
+++ b/lib/galaxy/tools/__init__.py	Fri Jan 09 15:59:29 2009 -0500
@@ -1045,7 +1045,7 @@
                     input.validate( value, None )
                     input_values[ input.name ] = value
     
-    def build_param_dict( self, incoming, input_datasets, output_datasets ):
+    def build_param_dict( self, incoming, input_datasets, output_datasets, working_directory = None ):
         """
         Build the dictionary of parameters for substituting into the command
         line. Each value is wrapped in a `InputValueWrapper`, which allows
@@ -1090,7 +1090,13 @@
                 for child in data.children:
                     param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child )
         for name, data in output_datasets.items():
-            param_dict[name] = DatasetFilenameWrapper( data )
+            # Write outputs to the working directory (for security purposes) if desired.
+            if self.app.config.outputs_to_working_directory and working_directory is not None:
+                false_path = os.path.abspath( os.path.join( working_directory, "galaxy_dataset_%d.dat" % data.id ) )
+                param_dict[name] = DatasetFilenameWrapper( data, false_path = false_path )
+                open( false_path, 'w' ).close()
+            else:
+                param_dict[name] = DatasetFilenameWrapper( data )
             # Provide access to a path to store additional files
             # TODO: path munging for cluster/dataset server relocatability
             param_dict[name].files_path = os.path.abspath(os.path.join(self.app.config.new_file_path, "dataset_%s_files" % (data.id) ))
@@ -1433,7 +1439,7 @@
         def items( self ):
             return iter( [ ( k, self.get( k ) ) for k, v in self.metadata.items() ] )
     
-    def __init__( self, dataset, datatypes_registry = None, tool = None, name = None ):
+    def __init__( self, dataset, datatypes_registry = None, tool = None, name = None, false_path = None ):
         if not dataset:
             try:
                 #TODO: allow this to work when working with grouping
@@ -1444,10 +1450,17 @@
         else:
             self.dataset = dataset
             self.metadata = self.MetadataWrapper( dataset.metadata )
+        self.false_path = false_path
     def __str__( self ):
-        return self.dataset.file_name
+        if self.false_path is not None:
+            return self.false_path
+        else:
+            return self.dataset.file_name
     def __getattr__( self, key ):
-        return getattr( self.dataset, key )
+        if self.false_path is not None and key == 'file_name':
+            return self.false_path
+        else:
+            return getattr( self.dataset, key )
         
 def json_fix( val ):
     if isinstance( val, list ):
@@ -1467,4 +1480,4 @@
             value[composite_key] = incoming[key + "_" + composite_key]
         return value
     else:
-        return incoming.get( key, default )
\ No newline at end of file
+        return incoming.get( key, default )