details: http://www.bx.psu.edu/hg/galaxy/rev/92d6a9796b24 changeset: 3128:92d6a9796b24 user: Dan Blankenberg <dan@bx.psu.edu> date: Mon Nov 30 14:55:30 2009 -0500 description: When setting metadata externally, substitute a MetadataTempFile object for all populated metadata FileParameters. Fixes the problem with writting to metadata FileParameter files on cluster nodes with read-only database/files directory. diffstat: lib/galaxy/datatypes/metadata.py | 21 +++- lib/galaxy/model/mapping.py | 1 + lib/galaxy/model/migrate/versions/0028_external_metadata_file_override.py | 47 +++++++++ scripts/set_metadata.py | 23 ++++- tools/regVariation/quality_filter.xml | 2 +- 5 files changed, 87 insertions(+), 7 deletions(-) diffs (172 lines): diff -r 9f7a2a3be0e7 -r 92d6a9796b24 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py Mon Nov 30 11:59:54 2009 -0500 +++ b/lib/galaxy/datatypes/metadata.py Mon Nov 30 14:55:30 2009 -0500 @@ -429,7 +429,9 @@ if MetadataTempFile.is_JSONified_value( value ): value = MetadataTempFile.from_JSON( value ) if isinstance( value, MetadataTempFile ): - mf = self.new_file( dataset = parent, **value.kwds ) + mf = parent.metadata.get( self.spec.name, None) + if mf is None: + mf = self.new_file( dataset = parent, **value.kwds ) shutil.move( value.file_name, mf.file_name ) value = mf.id return value @@ -521,7 +523,7 @@ if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name: return dataset_path.false_path return "" - return "%s,%s,%s,%s,%s" % ( metadata_files.filename_in, metadata_files.filename_kwds, metadata_files.filename_out, metadata_files.filename_results_code, __get_filename_override() ) + return "%s,%s,%s,%s,%s,%s" % ( metadata_files.filename_in, metadata_files.filename_kwds, metadata_files.filename_out, metadata_files.filename_results_code, __get_filename_override(), metadata_files.filename_override_metadata ) if not isinstance( datasets, list ): datasets = [ datasets ] if exec_dir is None: @@ -558,11 +560,22 @@ open( metadata_files.filename_out, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible) #file to store a 'return code' indicating the results of the set_meta() call #results code is like (True/False - if setting metadata was successful/failed , exception or string of reason of success/failure ) - metadata_files.filename_results_code = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_out_%s_" % key ).name ) + metadata_files.filename_results_code = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_results_%s_" % key ).name ) simplejson.dump( ( False, 'External set_meta() not called' ), open( metadata_files.filename_results_code, 'wb+' ) ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible) #file to store kwds passed to set_meta() metadata_files.filename_kwds = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_kwds_%s_" % key ).name ) simplejson.dump( kwds, open( metadata_files.filename_kwds, 'wb+' ), ensure_ascii=True ) + #existing metadata file parameters need to be overridden with cluster-writable file locations + metadata_files.filename_override_metadata = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_override_%s_" % key ).name ) + open( metadata_files.filename_override_metadata, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible) + override_metadata = [] + for meta_key, spec_value in dataset.metadata.spec.iteritems(): + if isinstance( spec_value.param, FileParameter ) and dataset.metadata.get( meta_key, None ) is not None: + metadata_temp = MetadataTempFile() + shutil.copy( dataset.metadata.get( meta_key, None ).file_name, metadata_temp.file_name ) + override_metadata.append( ( meta_key, metadata_temp.to_JSON() ) ) + simplejson.dump( override_metadata, open( metadata_files.filename_override_metadata, 'wb+' ) ) + #add to session and flush sa_session.add( metadata_files ) sa_session.flush() metadata_files_list.append( metadata_files ) @@ -585,7 +598,7 @@ #can occur if the job was stopped before completion, but a MetadataTempFile is used in the set_meta MetadataTempFile.cleanup_from_JSON_dict_filename( metadata_files.filename_out ) dataset_key = self.get_dataset_metadata_key( metadata_files.dataset ) - for key, fname in [ ( 'filename_in', metadata_files.filename_in ), ( 'filename_out', metadata_files.filename_out ), ( 'filename_results_code', metadata_files.filename_results_code ), ( 'filename_kwds', metadata_files.filename_kwds ) ]: + for key, fname in [ ( 'filename_in', metadata_files.filename_in ), ( 'filename_out', metadata_files.filename_out ), ( 'filename_results_code', metadata_files.filename_results_code ), ( 'filename_kwds', metadata_files.filename_kwds ), ( 'filename_override_metadata', metadata_files.filename_override_metadata ) ]: try: os.remove( fname ) except Exception, e: diff -r 9f7a2a3be0e7 -r 92d6a9796b24 lib/galaxy/model/mapping.py --- a/lib/galaxy/model/mapping.py Mon Nov 30 11:59:54 2009 -0500 +++ b/lib/galaxy/model/mapping.py Mon Nov 30 14:55:30 2009 -0500 @@ -358,6 +358,7 @@ Column( "filename_out", String( 255 ) ), Column( "filename_results_code", String( 255 ) ), Column( "filename_kwds", String( 255 ) ), + Column( "filename_override_metadata", String( 255 ) ), Column( "job_runner_external_pid", String( 255 ) ) ) Event.table = Table( "event", metadata, diff -r 9f7a2a3be0e7 -r 92d6a9796b24 lib/galaxy/model/migrate/versions/0028_external_metadata_file_override.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/model/migrate/versions/0028_external_metadata_file_override.py Mon Nov 30 14:55:30 2009 -0500 @@ -0,0 +1,47 @@ +""" +This script adds the filename_override_metadata column to the JobExternalOutputMetadata table, +allowing existing metadata files to be written when using external metadata and a cluster +set up with read-only access to database/files +""" +from sqlalchemy import * +from sqlalchemy.orm import * +from sqlalchemy.exceptions import * +from migrate import * +from migrate.changeset import * +import datetime +now = datetime.datetime.utcnow +import sys, logging +# Need our custom types, but don't import anything else from model +from galaxy.model.custom_types import * + +log = logging.getLogger( __name__ ) +log.setLevel(logging.DEBUG) +handler = logging.StreamHandler( sys.stdout ) +format = "%(name)s %(levelname)s %(asctime)s %(message)s" +formatter = logging.Formatter( format ) +handler.setFormatter( formatter ) +log.addHandler( handler ) + +metadata = MetaData( migrate_engine ) +db_session = scoped_session( sessionmaker( bind=migrate_engine, autoflush=False, autocommit=True ) ) + +def display_migration_details(): + print "========================================" + print "This script adds the filename_override_metadata column to the JobExternalOutputMetadata table," + print" allowing existing metadata files to be written when using external metadata and a cluster" + print "set up with read-only access to database/files" + print "========================================" +def upgrade(): + display_migration_details() + # Load existing tables + metadata.reflect() + try: + job_external_output_metadata = Table( "job_external_output_metadata", metadata, autoload=True ) + col = Column( "filename_override_metadata", String( 255 ) ) + col.create( job_external_output_metadata ) + assert col is job_external_output_metadata.c.filename_override_metadata + except Exception, e: + log.debug( "Adding column 'filename_override_metadata' to job_external_output_metadata table failed: %s" % ( str( e ) ) ) + +def downgrade(): + pass diff -r 9f7a2a3be0e7 -r 92d6a9796b24 scripts/set_metadata.py --- a/scripts/set_metadata.py Mon Nov 30 11:59:54 2009 -0500 +++ b/scripts/set_metadata.py Mon Nov 30 14:55:30 2009 -0500 @@ -50,17 +50,36 @@ except: continue for filenames in sys.argv[1:]: - filename_in, filename_kwds, filename_out, filename_results_code, dataset_filename_override = filenames.split( ',' ) + fields = filenames.split( ',' ) + filename_in = fields.pop( 0 ) + filename_kwds = fields.pop( 0 ) + filename_out = fields.pop( 0 ) + filename_results_code = fields.pop( 0 ) + dataset_filename_override = fields.pop( 0 ) + #Need to be careful with the way that these parameters are populated from the filename splitting, + #because if a job is running when the server is updated, any existing external metadata command-lines + #will not have info about the newly added override_metadata file + if fields: + override_metadata = fields.pop( 0 ) + else: + override_metadata = None try: dataset = cPickle.load( open( filename_in ) ) #load DatasetInstance if dataset_filename_override: dataset.dataset.external_filename = dataset_filename_override if ext_override.get( dataset.dataset.id, None ): dataset.extension = ext_override[ dataset.dataset.id ] + #Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles + if override_metadata: + override_metadata = simplejson.load( open( override_metadata ) ) + for metadata_name, metadata_file_override in override_metadata: + if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value( metadata_file_override ): + metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON( metadata_file_override ) + setattr( dataset.metadata, metadata_name, metadata_file_override ) kwds = stringify_dictionary_keys( simplejson.load( open( filename_kwds ) ) )#load kwds; need to ensure our keywords are not unicode dataset.datatype.set_meta( dataset, **kwds ) dataset.metadata.to_JSON_dict( filename_out ) # write out results of set_meta - simplejson.dump( ( True, 'Metadata has been set successfully' ), open( filename_results_code, 'wb+' ) ) #setting metadata has suceeded + simplejson.dump( ( True, 'Metadata has been set successfully' ), open( filename_results_code, 'wb+' ) ) #setting metadata has succeeded except Exception, e: simplejson.dump( ( False, str( e ) ), open( filename_results_code, 'wb+' ) ) #setting metadata has failed somehow clear_mappers() diff -r 9f7a2a3be0e7 -r 92d6a9796b24 tools/regVariation/quality_filter.xml --- a/tools/regVariation/quality_filter.xml Mon Nov 30 11:59:54 2009 -0500 +++ b/tools/regVariation/quality_filter.xml Mon Nov 30 14:55:30 2009 -0500 @@ -61,7 +61,7 @@ </conditional> </inputs> <outputs> - <data format="maf" name="out_file1"/> + <data format="maf" name="out_file1" metadata_source="input"/> </outputs> <requirements> <requirement type="python-module">numpy</requirement>