commit/galaxy-central: natefoo: Allow multiple output file collection from the job_working_directory.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/26403ed86d4c/ changeset: 26403ed86d4c user: natefoo date: 2012-04-04 04:07:19 summary: Allow multiple output file collection from the job_working_directory. affected #: 4 files diff -r 99419b4be32d1054c9b58f1664d8f1ab37ac91ac -r 26403ed86d4ccf39ea88c2658012a9a99d453266 lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -86,6 +86,7 @@ self.allow_user_dataset_purge = string_as_bool( kwargs.get( "allow_user_dataset_purge", "False" ) ) self.allow_user_impersonation = string_as_bool( kwargs.get( "allow_user_impersonation", "False" ) ) self.new_user_dataset_access_role_default_private = string_as_bool( kwargs.get( "new_user_dataset_access_role_default_private", "False" ) ) + self.collect_outputs_from = [ x.strip() for x in kwargs.get( 'collect_outputs_from', 'new_file_path,job_working_directory' ).lower().split(',') ] self.template_path = resolve_path( kwargs.get( "template_path", "templates" ), self.root ) self.template_cache = resolve_path( kwargs.get( "template_cache_path", "database/compiled_templates" ), self.root ) self.local_job_queue_workers = int( kwargs.get( "local_job_queue_workers", "5" ) ) diff -r 99419b4be32d1054c9b58f1664d8f1ab37ac91ac -r 26403ed86d4ccf39ea88c2658012a9a99d453266 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -443,7 +443,7 @@ # Check for and move associated_files self.tool.collect_associated_files(out_data, self.working_directory) # Create generated output children and primary datasets and add to param_dict - collected_datasets = {'children':self.tool.collect_child_datasets(out_data),'primary':self.tool.collect_primary_datasets(out_data)} + collected_datasets = {'children':self.tool.collect_child_datasets(out_data, self.working_directory),'primary':self.tool.collect_primary_datasets(out_data, self.working_directory)} param_dict.update({'__collected_datasets__':collected_datasets}) # Certain tools require tasks to be completed after job execution # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ). diff -r 99419b4be32d1054c9b58f1664d8f1ab37ac91ac -r 26403ed86d4ccf39ea88c2658012a9a99d453266 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2153,7 +2153,7 @@ shutil.rmtree(temp_file_path) except: continue - def collect_child_datasets( self, output): + def collect_child_datasets( self, output, job_working_directory ): """ Look for child dataset files, create HDA and attach to parent. """ @@ -2161,7 +2161,12 @@ # Loop through output file names, looking for generated children in # form of 'child_parentId_designation_visibility_extension' for name, outdata in output.items(): - for filename in glob.glob(os.path.join(self.app.config.new_file_path,"child_%i_*" % outdata.id) ): + filenames = [] + if 'new_file_path' in self.app.config.collect_outputs_from: + filenames.extend( glob.glob(os.path.join(self.app.config.new_file_path,"child_%i_*" % outdata.id) ) ) + if 'job_working_directory' in self.app.config.collect_outputs_from: + filenames.extend( glob.glob(os.path.join(job_working_directory,"child_%i_*" % outdata.id) ) ) + for filename in filenames: if not name in children: children[name] = {} fields = os.path.basename(filename).split("_") @@ -2213,7 +2218,7 @@ self.sa_session.add( child_dataset ) self.sa_session.flush() return children - def collect_primary_datasets( self, output): + def collect_primary_datasets( self, output, job_working_directory ): """ Find any additional datasets generated by a tool and attach (for cases where number of outputs is not known in advance). @@ -2223,7 +2228,12 @@ # datasets in form of: # 'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)' for name, outdata in output.items(): - for filename in glob.glob(os.path.join(self.app.config.new_file_path,"primary_%i_*" % outdata.id) ): + filenames = [] + if 'new_file_path' in self.app.config.collect_outputs_from: + filenames.extend( glob.glob(os.path.join(self.app.config.new_file_path,"primary_%i_*" % outdata.id) ) ) + if 'job_working_directory' in self.app.config.collect_outputs_from: + filenames.extend( glob.glob(os.path.join(job_working_directory,"primary_%i_*" % outdata.id) ) ) + for filename in filenames: if not name in primary_datasets: primary_datasets[name] = {} fields = os.path.basename(filename).split("_") diff -r 99419b4be32d1054c9b58f1664d8f1ab37ac91ac -r 26403ed86d4ccf39ea88c2658012a9a99d453266 universe_wsgi.ini.sample --- a/universe_wsgi.ini.sample +++ b/universe_wsgi.ini.sample @@ -151,7 +151,6 @@ # Directory where chrom len files are kept, currently mainly used by trackster #len_file_path = tool-data/shared/ucsc/chrom - # Datatypes config file, defines what data (file) types are available in # Galaxy. #datatypes_config_file = datatypes_conf.xml @@ -172,6 +171,14 @@ # Path to the directory containing the external_service_types defined in the config. #external_service_type_path = external_service_types +# Tools with a number of outputs not known until runtime can write these +# outputs to a directory for collection by Galaxy when the job is done. +# Previously, this directory was new_file_path, but using one global directory +# can cause performance problems, so using job_working_directory ('.' or cwd +# when a job is run) is encouraged. By default, both are checked to avoid +# breaking existing tools. +#collect_outputs_from = new_file_path,job_working_directory + # -- Mail and notification # Galaxy sends mail for various things: Subscribing users to the mailing list Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket