commit/galaxy-central: dan: 1.) Enhance set_metadata scripts to be able to handle metadata declared by tools for both regular declared outputs and new primary output datasets using JSON.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/c713d035edff/ Changeset: c713d035edff User: dan Date: 2014-07-10 18:09:50 Summary: 1.) Enhance set_metadata scripts to be able to handle metadata declared by tools for both regular declared outputs and new primary output datasets using JSON. 2.) Enhance collect_primary_datasets to allow tools to provide extra_files. Affected #: 3 files diff -r cef33a0617405e7b3f25c79955890c72e69dc14b -r c713d035edff866476384a47ab25410d9a271b54 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -126,10 +126,21 @@ rval[key] = self.spec[key].param.make_copy( value, target_context=self, source_context=to_copy ) return rval - def from_JSON_dict( self, filename, path_rewriter=None ): + def from_JSON_dict( self, filename=None, path_rewriter=None, json_dict=None ): dataset = self.parent - log.debug( 'loading metadata from file for: %s %s' % ( dataset.__class__.__name__, dataset.id ) ) - JSONified_dict = json.load( open( filename ) ) + if filename is not None: + log.debug( 'loading metadata from file for: %s %s' % ( dataset.__class__.__name__, dataset.id ) ) + JSONified_dict = json.load( open( filename ) ) + elif json_dict is not None: + log.debug( 'loading metadata from dict for: %s %s' % ( dataset.__class__.__name__, dataset.id ) ) + if isinstance( json_dict, basestring ): + JSONified_dict = json.loads( json_dict ) + elif isinstance( json_dict, dict ): + JSONified_dict = json_dict + else: + raise ValueError( "json_dict must be either a dictionary or a string, got %s." % ( type( json_dict ) ) ) + else: + raise ValueError( "You must provide either a filename or a json_dict" ) for name, spec in self.spec.items(): if name in JSONified_dict: from_ext_kwds = {} @@ -143,13 +154,15 @@ #metadata associated with our dataset, we'll delete it from our dataset's metadata dict del dataset._metadata[ name ] - def to_JSON_dict( self, filename ): + def to_JSON_dict( self, filename=None ): #galaxy.model.customtypes.json_encoder.encode() meta_dict = {} dataset_meta_dict = self.parent._metadata for name, spec in self.spec.items(): if name in dataset_meta_dict: meta_dict[ name ] = spec.param.to_external_value( dataset_meta_dict[ name ] ) + if filename is None: + return json.dumps( meta_dict ) json.dump( meta_dict, open( filename, 'wb+' ) ) def __getstate__( self ): diff -r cef33a0617405e7b3f25c79955890c72e69dc14b -r c713d035edff866476384a47ab25410d9a271b54 lib/galaxy/tools/parameters/output_collect.py --- a/lib/galaxy/tools/parameters/output_collect.py +++ b/lib/galaxy/tools/parameters/output_collect.py @@ -100,13 +100,36 @@ sa_session.flush() primary_data.state = outdata.state #add tool/metadata provided information - new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] ) + new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1], {} ) if new_primary_datasets_attributes: dataset_att_by_name = dict( ext='extension' ) for att_set in [ 'name', 'info', 'ext', 'dbkey' ]: dataset_att_name = dataset_att_by_name.get( att_set, att_set ) setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) ) - primary_data.set_meta() + extra_files_path = new_primary_datasets_attributes.get( 'extra_files', None ) + if extra_files_path: + extra_files_path_joined = os.path.join( job_working_directory, extra_files_path ) + for root, dirs, files in os.walk( extra_files_path_joined ): + extra_dir = os.path.join( primary_data.extra_files_path, root.replace( extra_files_path_joined, '', 1 ).lstrip( os.path.sep ) ) + for f in files: + app.object_store.update_from_file( primary_data.dataset, + extra_dir=extra_dir, + alt_name=f, + file_name=os.path.join( root, f ), + create=True, + dir_only=True, + preserve_symlinks=True + ) + # FIXME: + # since these are placed into the job working dir, let the standard + # Galaxy cleanup methods handle this (for now?) + # there was an extra_files_path dir, attempt to remove it + #shutil.rmtree( extra_files_path_joined ) + metadata_dict = new_primary_datasets_attributes.get( 'metadata', None ) + if metadata_dict: + primary_data.metadata.from_JSON_dict( json_dict=metadata_dict ) + else: + primary_data.set_meta() primary_data.set_peek() sa_session.add( primary_data ) sa_session.flush() diff -r cef33a0617405e7b3f25c79955890c72e69dc14b -r c713d035edff866476384a47ab25410d9a271b54 scripts/set_metadata.py --- a/scripts/set_metadata.py +++ b/scripts/set_metadata.py @@ -31,16 +31,27 @@ import galaxy.model.mapping # need to load this before we unpickle, in order to setup properties assigned by the mappers galaxy.model.Job() # this looks REAL stupid, but it is REQUIRED in order for SA to insert parameters into the classes defined by the mappers --> it appears that instantiating ANY mapper'ed class would suffice here from galaxy.util import stringify_dictionary_keys -from galaxy.util.json import from_json_string from sqlalchemy.orm import clear_mappers from galaxy.objectstore import build_object_store_from_config from galaxy import config import ConfigParser +def set_meta_with_tool_provided( dataset_instance, file_dict, set_meta_kwds ): + # This method is somewhat odd, in that we set the metadata attributes from tool, + # then call set_meta, then set metadata attributes from tool again. + # This is intentional due to interplay of overwrite kwd, the fact that some metadata + # parameters may rely on the values of others, and that we are accepting the + # values provided by the tool as Truth. + for metadata_name, metadata_value in file_dict.get( 'metadata', {} ).iteritems(): + setattr( dataset_instance.metadata, metadata_name, metadata_value ) + dataset_instance.datatype.set_meta( dataset_instance, **set_meta_kwds ) + for metadata_name, metadata_value in file_dict.get( 'metadata', {} ).iteritems(): + setattr( dataset_instance.metadata, metadata_name, metadata_value ) + def __main__(): file_path = sys.argv.pop( 1 ) - tmp_dir = sys.argv.pop( 1 ) + tool_job_working_directory = tmp_dir = sys.argv.pop( 1 ) #this is also the job_working_directory now galaxy.model.Dataset.file_path = file_path galaxy.datatypes.metadata.MetadataTempFile.tmp_dir = tmp_dir @@ -77,15 +88,19 @@ galaxy.model.set_datatypes_registry( datatypes_registry ) job_metadata = sys.argv.pop( 1 ) - ext_override = dict() + existing_job_metadata_dict = {} + new_job_metadata_dict = {} if job_metadata != "None" and os.path.exists( job_metadata ): for line in open( job_metadata, 'r' ): try: - line = stringify_dictionary_keys( from_json_string( line ) ) - assert line['type'] == 'dataset' - ext_override[line['dataset_id']] = line['ext'] + line = stringify_dictionary_keys( json.loads( line ) ) + if line['type'] == 'dataset': + existing_job_metadata_dict[ line['dataset_id'] ] = line + elif line['type'] == 'new_primary_dataset': + new_job_metadata_dict[ line[ 'filename' ] ] = line except: continue + for filenames in sys.argv[1:]: fields = filenames.split( ',' ) filename_in = fields.pop( 0 ) @@ -100,12 +115,13 @@ override_metadata = fields.pop( 0 ) else: override_metadata = None + set_meta_kwds = stringify_dictionary_keys( json.load( open( filename_kwds ) ) ) # load kwds; need to ensure our keywords are not unicode try: dataset = cPickle.load( open( filename_in ) ) # load DatasetInstance if dataset_filename_override: dataset.dataset.external_filename = dataset_filename_override - if ext_override.get( dataset.dataset.id, None ): - dataset.extension = ext_override[ dataset.dataset.id ] + if dataset.dataset.id in existing_job_metadata_dict: + dataset.extension = existing_job_metadata_dict[ dataset.dataset.id ].get( 'ext', dataset.extension ) # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles if override_metadata: override_metadata = json.load( open( override_metadata ) ) @@ -113,12 +129,27 @@ if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value( metadata_file_override ): metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON( metadata_file_override ) setattr( dataset.metadata, metadata_name, metadata_file_override ) - kwds = stringify_dictionary_keys( json.load( open( filename_kwds ) ) ) # load kwds; need to ensure our keywords are not unicode - dataset.datatype.set_meta( dataset, **kwds ) + file_dict = existing_job_metadata_dict.get( dataset.dataset.id, {} ) + set_meta_with_tool_provided( dataset, file_dict, set_meta_kwds ) dataset.metadata.to_JSON_dict( filename_out ) # write out results of set_meta json.dump( ( True, 'Metadata has been set successfully' ), open( filename_results_code, 'wb+' ) ) # setting metadata has succeeded except Exception, e: json.dump( ( False, str( e ) ), open( filename_results_code, 'wb+' ) ) # setting metadata has failed somehow + + for i, ( filename, file_dict ) in enumerate( new_job_metadata_dict.iteritems(), start=1 ): + new_dataset = galaxy.model.Dataset( id=-i, external_filename=os.path.join( tool_job_working_directory, file_dict[ 'filename' ] ) ) + extra_files = file_dict.get( 'extra_files', None ) + if extra_files is not None: + new_dataset._extra_files_path = os.path.join( tool_job_working_directory, extra_files ) + new_dataset.state = new_dataset.states.OK + new_dataset_instance = galaxy.model.HistoryDatasetAssociation( id=-i, dataset=new_dataset, extension=file_dict.get( 'ext', 'data' ) ) + set_meta_with_tool_provided( new_dataset_instance, file_dict, set_meta_kwds ) + file_dict[ 'metadata' ] = json.loads( new_dataset_instance.metadata.to_JSON_dict() ) #storing metadata in external form, need to turn back into dict, then later jsonify + if existing_job_metadata_dict or new_job_metadata_dict: + with open( job_metadata, 'wb' ) as job_metadata_fh: + for value in existing_job_metadata_dict.values() + new_job_metadata_dict.values(): + job_metadata_fh.write( "%s\n" % ( json.dumps( value ) ) ) + clear_mappers() # Shut down any additional threads that might have been created via the ObjectStore object_store.shutdown() Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org