[hg] galaxy 3032: PBS Upload Tool w/ set_metadata_externally = T...

details: http://www.bx.psu.edu/hg/galaxy/rev/db234dd18be4 changeset: 3032:db234dd18be4 user: Nate Coraor <nate@bx.psu.edu> date: Fri Nov 13 16:50:56 2009 -0500 description: PBS Upload Tool w/ set_metadata_externally = True fixes diffstat: lib/galaxy/datatypes/metadata.py | 4 ++-- lib/galaxy/jobs/__init__.py | 15 +++++++++------ lib/galaxy/jobs/runners/local.py | 4 +++- lib/galaxy/jobs/runners/pbs.py | 17 +++++++++++------ lib/galaxy/tools/actions/metadata.py | 1 + scripts/set_metadata.py | 15 ++++++++++++++- 6 files changed, 40 insertions(+), 16 deletions(-) diffs (167 lines): diff -r d16f28f312a9 -r db234dd18be4 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py Fri Nov 13 16:35:07 2009 -0500 +++ b/lib/galaxy/datatypes/metadata.py Fri Nov 13 16:50:56 2009 -0500 @@ -509,7 +509,7 @@ # need to make different keys for them, since ids can overlap return "%s_%d" % ( dataset.__class__.__name__, dataset.id ) def setup_external_metadata( self, datasets, sa_session, exec_dir=None, tmp_dir=None, dataset_files_path=None, - output_fnames=None, config_root=None, datatypes_config=None, kwds={} ): + output_fnames=None, config_root=None, datatypes_config=None, job_metadata=None, kwds={} ): #fill in metadata_files_dict and return the command with args required to set metadata def __metadata_files_list_to_cmd_line( metadata_files ): def __get_filename_override(): @@ -564,7 +564,7 @@ sa_session.flush() metadata_files_list.append( metadata_files ) #return command required to build - return "%s %s %s %s %s %s" % ( os.path.join( exec_dir, 'set_metadata.sh' ), dataset_files_path, tmp_dir, config_root, datatypes_config, " ".join( map( __metadata_files_list_to_cmd_line, metadata_files_list ) ) ) + return "%s %s %s %s %s %s %s" % ( os.path.join( exec_dir, 'set_metadata.sh' ), dataset_files_path, tmp_dir, config_root, datatypes_config, job_metadata, " ".join( map( __metadata_files_list_to_cmd_line, metadata_files_list ) ) ) def external_metadata_set_successfully( self, dataset, sa_session ): metadata_files = self.get_output_filenames_by_dataset( dataset, sa_session ) diff -r d16f28f312a9 -r db234dd18be4 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Fri Nov 13 16:35:07 2009 -0500 +++ b/lib/galaxy/jobs/__init__.py Fri Nov 13 16:50:56 2009 -0500 @@ -526,6 +526,7 @@ # If the tool was expected to set the extension, attempt to retrieve it if dataset.ext == 'auto': dataset.extension = context.get( 'ext', 'data' ) + dataset.init_meta( copy_from=dataset ) #if a dataset was copied, it won't appear in our dictionary: #either use the metadata from originating output dataset, or call set_meta on the copies #it would be quicker to just copy the metadata from the originating output dataset, @@ -715,14 +716,15 @@ for outfile in [ str( o ) for o in output_paths ]: sizes.append( ( outfile, os.stat( outfile ).st_size ) ) return sizes - def setup_external_metadata( self, exec_dir = None, tmp_dir = None, dataset_files_path = None, config_root = None, datatypes_config = None, **kwds ): + def setup_external_metadata( self, exec_dir = None, tmp_dir = None, dataset_files_path = None, config_root = None, datatypes_config = None, set_extension = True, **kwds ): # extension could still be 'auto' if this is the upload tool. job = self.sa_session.query( model.Job ).get( self.job_id ) - for output_dataset_assoc in job.output_datasets: - if output_dataset_assoc.dataset.ext == 'auto': - context = self.get_dataset_finish_context( dict(), output_dataset_assoc.dataset.dataset ) - output_dataset_assoc.dataset.extension = context.get( 'ext', 'data' ) - self.sa_session.flush() + if set_extension: + for output_dataset_assoc in job.output_datasets: + if output_dataset_assoc.dataset.ext == 'auto': + context = self.get_dataset_finish_context( dict(), output_dataset_assoc.dataset.dataset ) + output_dataset_assoc.dataset.extension = context.get( 'ext', 'data' ) + self.sa_session.flush() if tmp_dir is None: #this dir should should relative to the exec_dir tmp_dir = self.app.config.new_file_path @@ -739,6 +741,7 @@ dataset_files_path = dataset_files_path, config_root = config_root, datatypes_config = datatypes_config, + job_metadata = os.path.join( self.working_directory, TOOL_PROVIDED_JOB_METADATA_FILE ), **kwds ) class DefaultJobDispatcher( object ): diff -r d16f28f312a9 -r db234dd18be4 lib/galaxy/jobs/runners/local.py --- a/lib/galaxy/jobs/runners/local.py Fri Nov 13 16:35:07 2009 -0500 +++ b/lib/galaxy/jobs/runners/local.py Fri Nov 13 16:50:56 2009 -0500 @@ -106,7 +106,9 @@ #this is terminatable when output dataset/job is deleted #so that long running set_meta()s can be cancelled without having to reboot the server if job_wrapper.get_state() not in [ model.Job.states.ERROR, model.Job.states.DELETED ] and self.app.config.set_metadata_externally and job_wrapper.output_paths: - external_metadata_script = job_wrapper.setup_external_metadata( output_fnames = job_wrapper.get_output_fnames(), kwds = { 'overwrite' : False } ) #we don't want to overwrite metadata that was copied over in init_meta(), as per established behavior + external_metadata_script = job_wrapper.setup_external_metadata( output_fnames = job_wrapper.get_output_fnames(), + set_extension = True, + kwds = { 'overwrite' : False } ) #we don't want to overwrite metadata that was copied over in init_meta(), as per established behavior log.debug( 'executing external set_meta script for job %d: %s' % ( job_wrapper.job_id, external_metadata_script ) ) external_metadata_proc = subprocess.Popen( args = external_metadata_script, shell = True, diff -r d16f28f312a9 -r db234dd18be4 lib/galaxy/jobs/runners/pbs.py --- a/lib/galaxy/jobs/runners/pbs.py Fri Nov 13 16:35:07 2009 -0500 +++ b/lib/galaxy/jobs/runners/pbs.py Fri Nov 13 16:50:56 2009 -0500 @@ -29,7 +29,6 @@ fi cd %s %s -%s """ pbs_symlink_template = """#!/bin/sh @@ -178,7 +177,9 @@ pbs_queue_name = self.determine_pbs_queue( runner_url ) c = pbs.pbs_connect( pbs_server_name ) if c <= 0: - raise Exception( "Connection to PBS server for submit failed" ) + job_wrapper.fail( "Unable to queue job for execution. Resubmitting the job may succeed." ) + log.error( "Connection to PBS server for submit failed" ) + return # define job attributes ofile = "%s/%s.o" % (self.app.config.cluster_files_directory, job_wrapper.job_id) @@ -221,11 +222,15 @@ if self.app.config.pbs_stage_path != '': script = pbs_symlink_template % (job_wrapper.galaxy_lib_dir, " ".join(job_wrapper.get_input_fnames() + output_files), self.app.config.pbs_stage_path, exec_dir, command_line) else: + script = pbs_template % ( job_wrapper.galaxy_lib_dir, exec_dir, command_line ) if self.app.config.set_metadata_externally: - external_metadata_script = job_wrapper.setup_external_metadata( exec_dir = os.path.abspath( os.getcwd() ), tmp_dir = self.app.config.new_file_path, dataset_files_path = self.app.model.Dataset.file_path, output_fnames = output_fnames, kwds = { 'overwrite' : False } ) #we don't want to overwrite metadata that was copied over in init_meta(), as per established behavior - else: - external_metadata_script = "" - script = pbs_template % ( job_wrapper.galaxy_lib_dir, exec_dir, command_line, external_metadata_script ) + script += "cd %s\n" % os.path.abspath( os.getcwd() ) + script += "%s\n" % job_wrapper.setup_external_metadata( exec_dir = os.path.abspath( os.getcwd() ), + tmp_dir = self.app.config.new_file_path, + dataset_files_path = self.app.model.Dataset.file_path, + output_fnames = output_fnames, + set_extension = False, + kwds = { 'overwrite' : False } ) #we don't want to overwrite metadata that was copied over in init_meta(), as per established behavior job_file = "%s/%s.sh" % (self.app.config.cluster_files_directory, job_wrapper.job_id) fh = file(job_file, "w") fh.write(script) diff -r d16f28f312a9 -r db234dd18be4 lib/galaxy/tools/actions/metadata.py --- a/lib/galaxy/tools/actions/metadata.py Fri Nov 13 16:35:07 2009 -0500 +++ b/lib/galaxy/tools/actions/metadata.py Fri Nov 13 16:50:56 2009 -0500 @@ -41,6 +41,7 @@ output_fnames = None, config_root = None, datatypes_config = None, + job_metadata = None, kwds = { 'overwrite' : True } ) incoming[ '__SET_EXTERNAL_METADATA_COMMAND_LINE__' ] = cmd_line for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): diff -r d16f28f312a9 -r db234dd18be4 scripts/set_metadata.py --- a/scripts/set_metadata.py Fri Nov 13 16:35:07 2009 -0500 +++ b/scripts/set_metadata.py Fri Nov 13 16:50:56 2009 -0500 @@ -25,6 +25,7 @@ galaxy.model.Job() #this looks REAL stupid, but it is REQUIRED in order for SA to insert parameters into the classes defined by the mappers --> it appears that instantiating ANY mapper'ed class would suffice here galaxy.datatypes.metadata.DATABASE_CONNECTION_AVAILABLE = False #Let metadata know that there is no database connection, and to just assume object ids are valid from galaxy.util import stringify_dictionary_keys +from galaxy.util.json import from_json_string from sqlalchemy.orm import clear_mappers def __main__(): @@ -37,13 +38,25 @@ config_root = sys.argv.pop( 1 ) datatypes_config = sys.argv.pop( 1 ) galaxy.model.set_datatypes_registry( galaxy.datatypes.registry.Registry( config_root, datatypes_config ) ) - + + job_metadata = sys.argv.pop( 1 ) + ext_override = dict() + if job_metadata != "None" and os.path.exists( job_metadata ): + for line in open( job_metadata, 'r' ): + try: + line = stringify_dictionary_keys( from_json_string( line ) ) + assert line['type'] == 'dataset' + ext_override[line['dataset_id']] = line['ext'] + except: + continue for filenames in sys.argv[1:]: filename_in, filename_kwds, filename_out, filename_results_code, dataset_filename_override = filenames.split( ',' ) try: dataset = cPickle.load( open( filename_in ) ) #load DatasetInstance if dataset_filename_override: dataset.dataset.external_filename = dataset_filename_override + if ext_override.get( dataset.dataset.id, None ): + dataset.extension = ext_override[ dataset.dataset.id ] kwds = stringify_dictionary_keys( simplejson.load( open( filename_kwds ) ) )#load kwds; need to ensure our keywords are not unicode dataset.datatype.set_meta( dataset, **kwds ) dataset.metadata.to_JSON_dict( filename_out ) # write out results of set_meta
participants (1)
-
Greg Von Kuster