commit/galaxy-central: natefoo: Allow multiple output file collection from the job_working_directory.

3 Apr 2012

1 new commit in galaxy-central:


https://bitbucket.org/galaxy/galaxy-central/changeset/26403ed86d4c/
changeset:   26403ed86d4c
user:        natefoo
date:        2012-04-04 04:07:19
summary:     Allow multiple output file collection from the job_working_directory.
affected #:  4 files

diff -r 99419b4be32d1054c9b58f1664d8f1ab37ac91ac -r 26403ed86d4ccf39ea88c2658012a9a99d453266 lib/galaxy/config.py

--- a/lib/galaxy/config.py
+++ b/lib/galaxy/config.py
@@ -86,6 +86,7 @@
         self.allow_user_dataset_purge = string_as_bool( kwargs.get( "allow_user_dataset_purge", "False" ) )
         self.allow_user_impersonation = string_as_bool( kwargs.get( "allow_user_impersonation", "False" ) )
         self.new_user_dataset_access_role_default_private = string_as_bool( kwargs.get( "new_user_dataset_access_role_default_private", "False" ) )
+        self.collect_outputs_from = [ x.strip() for x in kwargs.get( 'collect_outputs_from', 'new_file_path,job_working_directory' ).lower().split(',') ]
         self.template_path = resolve_path( kwargs.get( "template_path", "templates" ), self.root )
         self.template_cache = resolve_path( kwargs.get( "template_cache_path", "database/compiled_templates" ), self.root )
         self.local_job_queue_workers = int( kwargs.get( "local_job_queue_workers", "5" ) )


diff -r 99419b4be32d1054c9b58f1664d8f1ab37ac91ac -r 26403ed86d4ccf39ea88c2658012a9a99d453266 lib/galaxy/jobs/__init__.py
--- a/lib/galaxy/jobs/__init__.py
+++ b/lib/galaxy/jobs/__init__.py
@@ -443,7 +443,7 @@
         # Check for and move associated_files
         self.tool.collect_associated_files(out_data, self.working_directory)
         # Create generated output children and primary datasets and add to param_dict
-        collected_datasets = {'children':self.tool.collect_child_datasets(out_data),'primary':self.tool.collect_primary_datasets(out_data)}
+        collected_datasets = {'children':self.tool.collect_child_datasets(out_data, self.working_directory),'primary':self.tool.collect_primary_datasets(out_data, self.working_directory)}
         param_dict.update({'__collected_datasets__':collected_datasets})
         # Certain tools require tasks to be completed after job execution
         # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ).


diff -r 99419b4be32d1054c9b58f1664d8f1ab37ac91ac -r 26403ed86d4ccf39ea88c2658012a9a99d453266 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -2153,7 +2153,7 @@
                     shutil.rmtree(temp_file_path)
             except:
                 continue
-    def collect_child_datasets( self, output):
+    def collect_child_datasets( self, output, job_working_directory ):
         """
         Look for child dataset files, create HDA and attach to parent.
         """
@@ -2161,7 +2161,12 @@
         # Loop through output file names, looking for generated children in 
         # form of 'child_parentId_designation_visibility_extension'
         for name, outdata in output.items():
-            for filename in glob.glob(os.path.join(self.app.config.new_file_path,"child_%i_*" % outdata.id) ):
+            filenames = []
+            if 'new_file_path' in self.app.config.collect_outputs_from:
+                filenames.extend( glob.glob(os.path.join(self.app.config.new_file_path,"child_%i_*" % outdata.id) ) )
+            if 'job_working_directory' in self.app.config.collect_outputs_from:
+                filenames.extend( glob.glob(os.path.join(job_working_directory,"child_%i_*" % outdata.id) ) )
+            for filename in filenames:
                 if not name in children:
                     children[name] = {}
                 fields = os.path.basename(filename).split("_")
@@ -2213,7 +2218,7 @@
                     self.sa_session.add( child_dataset )
                     self.sa_session.flush()
         return children
-    def collect_primary_datasets( self, output):
+    def collect_primary_datasets( self, output, job_working_directory ):
         """
         Find any additional datasets generated by a tool and attach (for 
         cases where number of outputs is not known in advance).
@@ -2223,7 +2228,12 @@
         # datasets in form of:
         #     'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)'
         for name, outdata in output.items():
-            for filename in glob.glob(os.path.join(self.app.config.new_file_path,"primary_%i_*" % outdata.id) ):
+            filenames = []
+            if 'new_file_path' in self.app.config.collect_outputs_from:
+                filenames.extend( glob.glob(os.path.join(self.app.config.new_file_path,"primary_%i_*" % outdata.id) ) )
+            if 'job_working_directory' in self.app.config.collect_outputs_from:
+                filenames.extend( glob.glob(os.path.join(job_working_directory,"primary_%i_*" % outdata.id) ) )
+            for filename in filenames:
                 if not name in primary_datasets:
                     primary_datasets[name] = {}
                 fields = os.path.basename(filename).split("_")


diff -r 99419b4be32d1054c9b58f1664d8f1ab37ac91ac -r 26403ed86d4ccf39ea88c2658012a9a99d453266 universe_wsgi.ini.sample
--- a/universe_wsgi.ini.sample
+++ b/universe_wsgi.ini.sample
@@ -151,7 +151,6 @@
 # Directory where chrom len files are kept, currently mainly used by trackster
 #len_file_path = tool-data/shared/ucsc/chrom
 
-
 # Datatypes config file, defines what data (file) types are available in
 # Galaxy.
 #datatypes_config_file = datatypes_conf.xml
@@ -172,6 +171,14 @@
 # Path to the directory containing the external_service_types defined in the config.
 #external_service_type_path = external_service_types
 
+# Tools with a number of outputs not known until runtime can write these
+# outputs to a directory for collection by Galaxy when the job is done.
+# Previously, this directory was new_file_path, but using one global directory
+# can cause performance problems, so using job_working_directory ('.' or cwd
+# when a job is run) is encouraged.  By default, both are checked to avoid
+# breaking existing tools.
+#collect_outputs_from = new_file_path,job_working_directory
+
 # -- Mail and notification
 
 # Galaxy sends mail for various things: Subscribing users to the mailing list

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

Bitbucket

tags

participants (1)