[galaxy-commits] commit/galaxy-central: jgoecks: Move implementation of from_work_dir attribute from job finish to job command line so that outputs are available earlier. This provides compatibility of attribute when setting metadata externally. Update Tophat wrapper to act as exemplar for attribute use.

20 Jul 2012

1 new commit in galaxy-central:


https://bitbucket.org/galaxy/galaxy-central/changeset/524cfb8ca67d/
changeset:   524cfb8ca67d
user:        jgoecks
date:        2012-07-20 19:18:54
summary:     Move implementation of from_work_dir attribute from job finish to job command line so that outputs are available earlier. This provides compatibility of attribute when setting metadata externally. Update Tophat wrapper to act as exemplar for attribute use.
affected #:  4 files

diff -r f4e720d9f182cb8216fdd2174fe3ab83d0350a24 -r 524cfb8ca67d491bcba54cda5a037a0c89e9a7b4 lib/galaxy/jobs/__init__.py

--- a/lib/galaxy/jobs/__init__.py
+++ b/lib/galaxy/jobs/__init__.py
@@ -336,38 +336,11 @@
         job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) )
         job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None )
 
-        def in_directory( file, directory ):
-            # Make both absolute.
-            directory = os.path.abspath( directory )
-            file = os.path.abspath( file )
-
-            #Return true, if the common prefix of both is equal to directory
-            #e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
-            return os.path.commonprefix( [ file, directory ] ) == directory
+        
         for dataset_assoc in job.output_datasets + job.output_library_datasets:
             context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset )
             #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur
             for dataset in dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations: #need to update all associated output hdas, i.e. history was shared with job running
-                #
-                # If HDA is to be copied from the working directory, do it now so that other attributes are correctly set.
-                #
-                if isinstance( dataset, model.HistoryDatasetAssociation ):
-                    joda = self.sa_session.query( model.JobToOutputDatasetAssociation ).filter_by( job=job, dataset=dataset ).first()
-                    if joda and job_tool:
-                        hda_tool_output = job_tool.outputs.get( joda.name, None )
-                        if hda_tool_output and hda_tool_output.from_work_dir:
-                            # Copy from working dir to HDA.
-                            source_file = os.path.join( os.path.abspath( self.working_directory ), hda_tool_output.from_work_dir )
-                            if in_directory( source_file, self.working_directory ):
-                                try:
-                                    shutil.move( source_file, dataset.file_name )
-                                    log.debug( "finish(): Moved %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) )
-                                except ( IOError, OSError ):
-                                    log.debug( "finish(): Could not move %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) )
-                            else:
-                                # Security violation.
-                                log.exception( "from_work_dir specified a location not in the working directory: %s, %s" % ( source_file, self.working_directory ) )
-
                 dataset.blurb = 'done'
                 dataset.peek  = 'no peek'
                 dataset.info = ( dataset.info  or '' ) + context['stdout'] + context['stderr']


diff -r f4e720d9f182cb8216fdd2174fe3ab83d0350a24 -r 524cfb8ca67d491bcba54cda5a037a0c89e9a7b4 lib/galaxy/jobs/runners/__init__.py
--- a/lib/galaxy/jobs/runners/__init__.py
+++ b/lib/galaxy/jobs/runners/__init__.py
@@ -1,4 +1,6 @@
-import os, os.path
+import os, logging, os.path
+
+log = logging.getLogger( __name__ )
 
 class BaseJobRunner( object ):
     def build_command_line( self, job_wrapper, include_metadata=False ):
@@ -10,6 +12,19 @@
             - command line taken from job wrapper
             - commands to set metadata (if include_metadata is True)
         """
+
+        def in_directory( file, directory ):
+            """
+            Return true, if the common prefix of both is equal to directory
+            e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
+            """
+
+            # Make both absolute.
+            directory = os.path.abspath( directory )
+            file = os.path.abspath( file )
+
+            return os.path.commonprefix( [ file, directory ] ) == directory
+
         commands = job_wrapper.get_command_line()
         # All job runners currently handle this case which should never
         # occur
@@ -25,6 +40,31 @@
         if job_wrapper.dependency_shell_commands:
             commands = "; ".join( job_wrapper.dependency_shell_commands + [ commands ] ) 
 
+        # Append commands to copy job outputs based on from_work_dir attribute.
+        job = job_wrapper.get_job()
+        job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None )
+        for dataset_assoc in job.output_datasets + job.output_library_datasets:
+            for dataset in dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations:
+                if isinstance( dataset, self.app.model.HistoryDatasetAssociation ):
+                    joda = self.sa_session.query( self.app.model.JobToOutputDatasetAssociation ).filter_by( job=job, dataset=dataset ).first()
+                    if joda and job_tool:
+                        hda_tool_output = job_tool.outputs.get( joda.name, None )
+                        if hda_tool_output and hda_tool_output.from_work_dir:
+                            # Copy from working dir to HDA.
+                            # TODO: move instead of copy to save time?
+                            source_file = os.path.join( os.path.abspath( job_wrapper.working_directory ), hda_tool_output.from_work_dir )
+                            if in_directory( source_file, job_wrapper.working_directory ):
+                                try:
+                                    commands += "; cp %s %s" % ( source_file, dataset.file_name )
+                                    log.debug( "Copying %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) )
+                                except ( IOError, OSError ):
+                                    log.debug( "Could not copy %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) )
+                            else:
+                                # Security violation.
+                                log.exception( "from_work_dir specified a location not in the working directory: %s, %s" % ( source_file, job_wrapper.working_directory ) )
+
+
+
         # Append metadata setting commands, we don't want to overwrite metadata
         # that was copied over in init_meta(), as per established behavior
         if include_metadata and self.app.config.set_metadata_externally:


diff -r f4e720d9f182cb8216fdd2174fe3ab83d0350a24 -r 524cfb8ca67d491bcba54cda5a037a0c89e9a7b4 tools/ngs_rna/tophat_wrapper.py
--- a/tools/ngs_rna/tophat_wrapper.py
+++ b/tools/ngs_rna/tophat_wrapper.py
@@ -227,10 +227,6 @@
         if returncode != 0:
             raise Exception, stderr
             
-        # Copy output files from tmp directory to specified files.
-        shutil.copyfile( os.path.join( "tophat_out", "junctions.bed" ), options.junctions_output_file )
-        shutil.copyfile( os.path.join( "tophat_out", "accepted_hits.bam" ), options.accepted_hits_output_file )
-
         # TODO: look for errors in program output.
     except Exception, e:
         stop_err( 'Error in tophat:\n' + str( e ) ) 


diff -r f4e720d9f182cb8216fdd2174fe3ab83d0350a24 -r 524cfb8ca67d491bcba54cda5a037a0c89e9a7b4 tools/ngs_rna/tophat_wrapper.xml
--- a/tools/ngs_rna/tophat_wrapper.xml
+++ b/tools/ngs_rna/tophat_wrapper.xml
@@ -424,7 +424,7 @@
               </conditional></actions></data>
-        <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions">
+        <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions" from_work_dir="tophat_out/junctions.bed"><actions><conditional name="refGenomeSource.genomeSource"><when value="indexed">
@@ -443,7 +443,7 @@
               </conditional></actions></data>
-        <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits">
+        <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits" from_work_dir="tophat_out/accepted_hits.bam"><actions><conditional name="refGenomeSource.genomeSource"><when value="indexed">

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

Bitbucket