[hg] galaxy 2375: Remove direct references of the model from the...
details: http://www.bx.psu.edu/hg/galaxy/rev/7fd4f748b0ca changeset: 2375:7fd4f748b0ca user: Dan Blankenberg <dan@bx.psu.edu> date: Mon Apr 27 16:30:08 2009 -0400 description: Remove direct references of the model from the the cleanup_datasets_fix migration script. 1 file(s) affected in this change: lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py diffs (709 lines): diff -r d0c905db68db -r 7fd4f748b0ca lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py Mon Apr 27 16:30:08 2009 -0400 @@ -0,0 +1,705 @@ +import sys, logging, os, time, datetime, errno + +log = logging.getLogger( __name__ ) +log.setLevel(logging.DEBUG) +handler = logging.StreamHandler( sys.stdout ) +format = "%(name)s %(levelname)s %(asctime)s %(message)s" +formatter = logging.Formatter( format ) +handler.setFormatter( formatter ) +log.addHandler( handler ) + +from migrate import migrate_engine +from sqlalchemy import and_ + +from sqlalchemy import * +now = datetime.datetime.utcnow +from sqlalchemy.orm import * + +from galaxy.model.orm.ext.assignmapper import assign_mapper + +from galaxy.model.custom_types import * + +from galaxy.util.bunch import Bunch + + +metadata = MetaData( migrate_engine ) +context = scoped_session( sessionmaker( autoflush=False, transactional=False ) ) + + +## classes +def get_permitted_actions( **kwds ): + return Bunch() + +def directory_hash_id( id ): + s = str( id ) + l = len( s ) + # Shortcut -- ids 0-999 go under ../000/ + if l < 4: + return [ "000" ] + # Pad with zeros until a multiple of three + padded = ( ( 3 - len( s ) % 3 ) * "0" ) + s + # Drop the last three digits -- 1000 files per directory + padded = padded[:-3] + # Break into chunks of three + return [ padded[i*3:(i+1)*3] for i in range( len( padded ) // 3 ) ] + + +class Dataset( object ): + states = Bunch( NEW = 'new', + UPLOAD = 'upload', + QUEUED = 'queued', + RUNNING = 'running', + OK = 'ok', + EMPTY = 'empty', + ERROR = 'error', + DISCARDED = 'discarded' ) + permitted_actions = get_permitted_actions( filter='DATASET' ) + file_path = "/tmp/" + engine = None + def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ): + self.id = id + self.state = state + self.deleted = False + self.purged = False + self.purgable = purgable + self.external_filename = external_filename + self._extra_files_path = extra_files_path + self.file_size = file_size + def get_file_name( self ): + if not self.external_filename: + assert self.id is not None, "ID must be set before filename used (commit the object)" + # First try filename directly under file_path + filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id ) + # Only use that filename if it already exists (backward compatibility), + # otherwise construct hashed path + if not os.path.exists( filename ): + dir = os.path.join( self.file_path, *directory_hash_id( self.id ) ) + # Create directory if it does not exist + try: + os.makedirs( dir ) + except OSError, e: + # File Exists is okay, otherwise reraise + if e.errno != errno.EEXIST: + raise + # Return filename inside hashed directory + return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) ) + else: + filename = self.external_filename + # Make filename absolute + return os.path.abspath( filename ) + def set_file_name ( self, filename ): + if not filename: + self.external_filename = None + else: + self.external_filename = filename + file_name = property( get_file_name, set_file_name ) + @property + def extra_files_path( self ): + if self._extra_files_path: + path = self._extra_files_path + else: + path = os.path.join( self.file_path, "dataset_%d_files" % self.id ) + #only use path directly under self.file_path if it exists + if not os.path.exists( path ): + path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id ) + # Make path absolute + return os.path.abspath( path ) + def get_size( self ): + """Returns the size of the data on disk""" + if self.file_size: + return self.file_size + else: + try: + return os.path.getsize( self.file_name ) + except OSError: + return 0 + def set_size( self ): + """Returns the size of the data on disk""" + try: + if not self.file_size: + self.file_size = os.path.getsize( self.file_name ) + except OSError: + self.file_size = 0 + def has_data( self ): + """Detects whether there is any data""" + return self.get_size() > 0 + def mark_deleted( self, include_children=True ): + self.deleted = True + # FIXME: sqlalchemy will replace this + def _delete(self): + """Remove the file that corresponds to this data""" + try: + os.remove(self.data.file_name) + except OSError, e: + log.critical('%s delete error %s' % (self.__class__.__name__, e)) + +class DatasetInstance( object ): + """A base class for all 'dataset instances', HDAs, LDAs, etc""" + states = Dataset.states + permitted_actions = Dataset.permitted_actions + def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None, + dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None, + parent_id=None, validation_errors=None, visible=True, create_dataset = False ): + self.name = name or "Unnamed dataset" + self.id = id + self.info = info + self.blurb = blurb + self.peek = peek + self.extension = extension + self.designation = designation + self.metadata = metadata or dict() + if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata' + self.dbkey = dbkey + self.deleted = deleted + self.visible = visible + # Relationships + if not dataset and create_dataset: + dataset = Dataset( state=Dataset.states.NEW ) + dataset.flush() + self.dataset = dataset + self.parent_id = parent_id + self.validation_errors = validation_errors + @property + def ext( self ): + return self.extension + def get_dataset_state( self ): + return self.dataset.state + def set_dataset_state ( self, state ): + self.dataset.state = state + self.dataset.flush() #flush here, because hda.flush() won't flush the Dataset object + state = property( get_dataset_state, set_dataset_state ) + def get_file_name( self ): + return self.dataset.get_file_name() + def set_file_name (self, filename): + return self.dataset.set_file_name( filename ) + file_name = property( get_file_name, set_file_name ) + @property + def extra_files_path( self ): + return self.dataset.extra_files_path + @property + def datatype( self ): + return datatypes_registry.get_datatype_by_extension( self.extension ) + def get_metadata( self ): + if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a context.clear() cause parent to be invalidated, while still copying over this non-database attribute? + self._metadata_collection = MetadataCollection( self ) + return self._metadata_collection + def set_metadata( self, bunch ): + # Needs to accept a MetadataCollection, a bunch, or a dict + self._metadata = self.metadata.make_dict_copy( bunch ) + metadata = property( get_metadata, set_metadata ) + # This provide backwards compatibility with using the old dbkey + # field in the database. That field now maps to "old_dbkey" (see mapping.py). + def get_dbkey( self ): + dbkey = self.metadata.dbkey + if not isinstance(dbkey, list): dbkey = [dbkey] + if dbkey in [[None], []]: return "?" + return dbkey[0] + def set_dbkey( self, value ): + if "dbkey" in self.datatype.metadata_spec: + if not isinstance(value, list): + self.metadata.dbkey = [value] + else: + self.metadata.dbkey = value + dbkey = property( get_dbkey, set_dbkey ) + def change_datatype( self, new_ext ): + self.clear_associated_files() + datatypes_registry.change_datatype( self, new_ext ) + def get_size( self ): + """Returns the size of the data on disk""" + return self.dataset.get_size() + def set_size( self ): + """Returns the size of the data on disk""" + return self.dataset.set_size() + def has_data( self ): + """Detects whether there is any data""" + return self.dataset.has_data() + def get_raw_data( self ): + """Returns the full data. To stream it open the file_name and read/write as needed""" + return self.datatype.get_raw_data( self ) + def write_from_stream( self, stream ): + """Writes data from a stream""" + self.datatype.write_from_stream(self, stream) + def set_raw_data( self, data ): + """Saves the data on the disc""" + self.datatype.set_raw_data(self, data) + def get_mime( self ): + """Returns the mime type of the data""" + return datatypes_registry.get_mimetype_by_extension( self.extension.lower() ) + def set_peek( self ): + return self.datatype.set_peek( self ) + def set_multi_byte_peek( self ): + return self.datatype.set_multi_byte_peek( self ) + def init_meta( self, copy_from=None ): + return self.datatype.init_meta( self, copy_from=copy_from ) + def set_meta( self, **kwd ): + self.clear_associated_files( metadata_safe = True ) + return self.datatype.set_meta( self, **kwd ) + def set_readonly_meta( self, **kwd ): + return self.datatype.set_readonly_meta( self, **kwd ) + def missing_meta( self, **kwd ): + return self.datatype.missing_meta( self, **kwd ) + def as_display_type( self, type, **kwd ): + return self.datatype.as_display_type( self, type, **kwd ) + def display_peek( self ): + return self.datatype.display_peek( self ) + def display_name( self ): + return self.datatype.display_name( self ) + def display_info( self ): + return self.datatype.display_info( self ) + def get_converted_files_by_type( self, file_type ): + valid = [] + for assoc in self.implicitly_converted_datasets: + if not assoc.deleted and assoc.type == file_type: + valid.append( assoc.dataset ) + return valid + def clear_associated_files( self, metadata_safe = False, purge = False ): + raise 'Unimplemented' + def get_child_by_designation(self, designation): + for child in self.children: + if child.designation == designation: + return child + return None + def get_converter_types(self): + return self.datatype.get_converter_types( self, datatypes_registry) + def find_conversion_destination( self, accepted_formats, **kwd ): + """Returns ( target_ext, exisiting converted dataset )""" + return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd ) + def add_validation_error( self, validation_error ): + self.validation_errors.append( validation_error ) + def extend_validation_errors( self, validation_errors ): + self.validation_errors.extend(validation_errors) + def mark_deleted( self, include_children=True ): + self.deleted = True + if include_children: + for child in self.children: + child.mark_deleted() + def mark_undeleted( self, include_children=True ): + self.deleted = False + if include_children: + for child in self.children: + child.mark_undeleted() + def undeletable( self ): + if self.purged: + return False + return True + @property + def source_library_dataset( self ): + def get_source( dataset ): + if isinstance( dataset, LibraryDatasetDatasetAssociation ): + if dataset.library_dataset: + return ( dataset, dataset.library_dataset ) + if dataset.copied_from_library_dataset_dataset_association: + source = get_source( dataset.copied_from_library_dataset_dataset_association ) + if source: + return source + if dataset.copied_from_history_dataset_association: + source = get_source( dataset.copied_from_history_dataset_association ) + if source: + return source + return ( None, None ) + return get_source( self ) + + +class HistoryDatasetAssociation( DatasetInstance ): + def __init__( self, + hid = None, + history = None, + copied_from_history_dataset_association = None, + copied_from_library_dataset_dataset_association = None, + **kwd ): + DatasetInstance.__init__( self, **kwd ) + self.hid = hid + # Relationships + self.history = history + self.copied_from_history_dataset_association = copied_from_history_dataset_association + self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association + def copy( self, copy_children = False, parent_id = None, target_history = None ): + hda = HistoryDatasetAssociation( hid=self.hid, + name=self.name, + info=self.info, + blurb=self.blurb, + peek=self.peek, + extension=self.extension, + dbkey=self.dbkey, + dataset = self.dataset, + visible=self.visible, + deleted=self.deleted, + parent_id=parent_id, + copied_from_history_dataset_association=self, + history = target_history ) + hda.flush() + hda.set_size() + # Need to set after flushed, as MetadataFiles require dataset.id + hda.metadata = self.metadata + if copy_children: + for child in self.children: + child_copy = child.copy( copy_children = copy_children, parent_id = hda.id ) + if not self.datatype.copy_safe_peek: + # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs + hda.set_peek() + hda.flush() + return hda + def to_library_dataset_dataset_association( self, target_folder, replace_dataset=None, parent_id=None ): + if replace_dataset: + # The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version. + library_dataset = replace_dataset + else: + # If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new + # LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset. + library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info ) + library_dataset.flush() + ldda = LibraryDatasetDatasetAssociation( name=self.name, + info=self.info, + blurb=self.blurb, + peek=self.peek, + extension=self.extension, + dbkey=self.dbkey, + dataset=self.dataset, + library_dataset=library_dataset, + visible=self.visible, + deleted=self.deleted, + parent_id=parent_id, + copied_from_history_dataset_association=self, + user=self.history.user ) + ldda.flush() + # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset + # Must set metadata after ldda flushed, as MetadataFiles require ldda.id + ldda.metadata = self.metadata + if not replace_dataset: + target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey ) + target_folder.flush() + library_dataset.library_dataset_dataset_association_id = ldda.id + library_dataset.flush() + for child in self.children: + child_copy = child.to_library_dataset_dataset_association( target_folder=target_folder, replace_dataset=replace_dataset, parent_id=ldda.id ) + if not self.datatype.copy_safe_peek: + # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs + ldda.set_peek() + ldda.flush() + return ldda + def clear_associated_files( self, metadata_safe = False, purge = False ): + # metadata_safe = True means to only clear when assoc.metadata_safe == False + for assoc in self.implicitly_converted_datasets: + if not metadata_safe or not assoc.metadata_safe: + assoc.clear( purge = purge ) + + + +class LibraryDatasetDatasetAssociation( DatasetInstance ): + def __init__( self, + copied_from_history_dataset_association=None, + copied_from_library_dataset_dataset_association=None, + library_dataset=None, + user=None, + **kwd ): + DatasetInstance.__init__( self, **kwd ) + self.copied_from_history_dataset_association = copied_from_history_dataset_association + self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association + self.library_dataset = library_dataset + self.user = user + def to_history_dataset_association( self, target_history, parent_id=None ): + hid = target_history._next_hid() + hda = HistoryDatasetAssociation( name=self.name, + info=self.info, + blurb=self.blurb, + peek=self.peek, + extension=self.extension, + dbkey=self.dbkey, + dataset=self.dataset, + visible=self.visible, + deleted=self.deleted, + parent_id=parent_id, + copied_from_library_dataset_dataset_association=self, + history=target_history, + hid=hid ) + hda.flush() + hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id + for child in self.children: + child_copy = child.to_history_dataset_association( target_history=target_history, parent_id=hda.id ) + if not self.datatype.copy_safe_peek: + hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs + hda.flush() + return hda + def copy( self, copy_children = False, parent_id = None, target_folder = None ): + ldda = LibraryDatasetDatasetAssociation( name=self.name, + info=self.info, + blurb=self.blurb, + peek=self.peek, + extension=self.extension, + dbkey=self.dbkey, + dataset=self.dataset, + visible=self.visible, + deleted=self.deleted, + parent_id=parent_id, + copied_from_library_dataset_dataset_association=self, + folder=target_folder ) + ldda.flush() + # Need to set after flushed, as MetadataFiles require dataset.id + ldda.metadata = self.metadata + if copy_children: + for child in self.children: + child_copy = child.copy( copy_children = copy_children, parent_id = ldda.id ) + if not self.datatype.copy_safe_peek: + # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs + ldda.set_peek() + ldda.flush() + return ldda + def clear_associated_files( self, metadata_safe = False, purge = False ): + return + def get_library_item_info_templates( self, template_list=[], restrict=False ): + # If restrict is True, we'll return only those templates directly associated with this LibraryDatasetDatasetAssociation + if self.library_dataset_dataset_info_template_associations: + template_list.extend( [ lddita.library_item_info_template for lddita in self.library_dataset_dataset_info_template_associations if lddita.library_item_info_template not in template_list ] ) + self.library_dataset.get_library_item_info_templates( template_list, restrict ) + return template_list + + + +class LibraryDataset( object ): + # This class acts as a proxy to the currently selected LDDA + def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ): + self.folder = folder + self.order_id = order_id + self.name = name + self.info = info + self.library_dataset_dataset_association = library_dataset_dataset_association + def set_library_dataset_dataset_association( self, ldda ): + self.library_dataset_dataset_association = ldda + ldda.library_dataset = self + ldda.flush() + self.flush() + def get_info( self ): + if self.library_dataset_dataset_association: + return self.library_dataset_dataset_association.info + elif self._info: + return self._info + else: + return 'no info' + def set_info( self, info ): + self._info = info + info = property( get_info, set_info ) + def get_name( self ): + if self.library_dataset_dataset_association: + return self.library_dataset_dataset_association.name + elif self._name: + return self._name + else: + return 'Unnamed dataset' + def set_name( self, name ): + self._name = name + name = property( get_name, set_name ) + def display_name( self ): + self.library_dataset_dataset_association.display_name() + def get_purged( self ): + return self.library_dataset_dataset_association.dataset.purged + def set_purged( self, purged ): + if purged: + raise Exception( "Not implemented" ) + if not purged and self.purged: + raise Exception( "Cannot unpurge once purged" ) + purged = property( get_purged, set_purged ) + def get_library_item_info_templates( self, template_list=[], restrict=False ): + # If restrict is True, we'll return only those templates directly associated with this LibraryDataset + if self.library_dataset_info_template_associations: + template_list.extend( [ ldita.library_item_info_template for ldita in self.library_dataset_info_template_associations if ldita.library_item_info_template not in template_list ] ) + if restrict not in [ 'True', True ]: + self.folder.get_library_item_info_templates( template_list, restrict ) + return template_list + +##tables + + +Dataset.table = Table( "dataset", metadata, + Column( "id", Integer, primary_key=True ), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, index=True, default=now, onupdate=now ), + Column( "state", TrimmedString( 64 ) ), + Column( "deleted", Boolean, index=True, default=False ), + Column( "purged", Boolean, index=True, default=False ), + Column( "purgable", Boolean, default=True ), + Column( "external_filename" , TEXT ), + Column( "_extra_files_path", TEXT ), + Column( 'file_size', Numeric( 15, 0 ) ) ) + + + +HistoryDatasetAssociation.table = Table( "history_dataset_association", metadata, + Column( "id", Integer, primary_key=True ), + Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, default=now, onupdate=now ), + Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ), + Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ), + Column( "hid", Integer ), + Column( "name", TrimmedString( 255 ) ), + Column( "info", TrimmedString( 255 ) ), + Column( "blurb", TrimmedString( 255 ) ), + Column( "peek" , TEXT ), + Column( "extension", TrimmedString( 64 ) ), + Column( "metadata", MetadataType(), key="_metadata" ), + Column( "parent_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ), + Column( "designation", TrimmedString( 255 ) ), + Column( "deleted", Boolean, index=True, default=False ), + Column( "visible", Boolean ) ) + + +LibraryDatasetDatasetAssociation.table = Table( "library_dataset_dataset_association", metadata, + Column( "id", Integer, primary_key=True ), + Column( "library_dataset_id", Integer, ForeignKey( "library_dataset.id" ), index=True ), + Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, default=now, onupdate=now ), + Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id", use_alter=True, name='history_dataset_association_dataset_id_fkey' ), nullable=True ), + Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name='library_dataset_dataset_association_id_fkey' ), nullable=True ), + Column( "name", TrimmedString( 255 ) ), + Column( "info", TrimmedString( 255 ) ), + Column( "blurb", TrimmedString( 255 ) ), + Column( "peek" , TEXT ), + Column( "extension", TrimmedString( 64 ) ), + Column( "metadata", MetadataType(), key="_metadata" ), + Column( "parent_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ), + Column( "designation", TrimmedString( 255 ) ), + Column( "deleted", Boolean, index=True, default=False ), + Column( "visible", Boolean ), + Column( "message", TrimmedString( 255 ) ) ) + +LibraryDataset.table = Table( "library_dataset", metadata, + Column( "id", Integer, primary_key=True ), + Column( "library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk" ), nullable=True, index=True ),#current version of dataset, if null, there is not a current version selected + Column( "order_id", Integer ), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, default=now, onupdate=now ), + Column( "name", TrimmedString( 255 ), key="_name" ), #when not None/null this will supercede display in library (but not when imported into user's history?) + Column( "info", TrimmedString( 255 ), key="_info" ), #when not None/null this will supercede display in library (but not when imported into user's history?) + Column( "deleted", Boolean, index=True, default=False ) ) + + + +##mappers + + +assign_mapper( context, Dataset, Dataset.table, + properties=dict( + history_associations=relation( + HistoryDatasetAssociation, + primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) ), + active_history_associations=relation( + HistoryDatasetAssociation, + primaryjoin=( ( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) & ( HistoryDatasetAssociation.table.c.deleted == False ) ) ), + library_associations=relation( + LibraryDatasetDatasetAssociation, + primaryjoin=( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) ), + active_library_associations=relation( + LibraryDatasetDatasetAssociation, + primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) ) + ) ) + + +assign_mapper( context, HistoryDatasetAssociation, HistoryDatasetAssociation.table, + properties=dict( + dataset=relation( + Dataset, + primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ), + # .history defined in History mapper + copied_to_history_dataset_associations=relation( + HistoryDatasetAssociation, + primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), + backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ), + copied_to_library_dataset_dataset_associations=relation( + LibraryDatasetDatasetAssociation, + primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), + backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ), + children=relation( + HistoryDatasetAssociation, + primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), + backref=backref( "parent", primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ), + visible_children=relation( + HistoryDatasetAssociation, + primaryjoin=( ( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ) & ( HistoryDatasetAssociation.table.c.visible == True ) ) ) + ) ) + +assign_mapper( context, LibraryDatasetDatasetAssociation, LibraryDatasetDatasetAssociation.table, + properties=dict( + dataset=relation( Dataset ), + library_dataset = relation( LibraryDataset, + primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ), + copied_to_library_dataset_dataset_associations=relation( + LibraryDatasetDatasetAssociation, + primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), + backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ), + copied_to_history_dataset_associations=relation( + HistoryDatasetAssociation, + primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), + backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ), + children=relation( + LibraryDatasetDatasetAssociation, + primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), + backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ), + visible_children=relation( + LibraryDatasetDatasetAssociation, + primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) ) + ) ) + +assign_mapper( context, LibraryDataset, LibraryDataset.table, + properties=dict( + library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, primaryjoin=( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ), + expired_datasets = relation( LibraryDatasetDatasetAssociation, foreign_keys=[LibraryDataset.table.c.id,LibraryDataset.table.c.library_dataset_dataset_association_id ], primaryjoin=( ( LibraryDataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.library_dataset_id ) & ( not_( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ) ), viewonly=True, uselist=True ) + ) ) + + +def __guess_dataset_by_filename( filename ): + """Return a guessed dataset by filename""" + try: + fields = os.path.split( filename ) + if fields: + if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat + return Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) ) + except: + pass #some parsing error, we can't guess Dataset + return None + +def upgrade(): + log.debug( "Fixing a discrepancy concerning deleted shared history items." ) + affected_items = 0 + start_time = time.time() + for dataset in Dataset.filter( and_( Dataset.c.deleted == True, Dataset.c.purged == False ) ).all(): + for dataset_instance in dataset.history_associations + dataset.library_associations: + if not dataset_instance.deleted: + dataset.deleted = False + if dataset.file_size in [ None, 0 ]: + dataset.set_size() #Restore filesize + affected_items += 1 + break + context.flush() + log.debug( "%i items affected, and restored." % ( affected_items ) ) + log.debug( "Time elapsed: %s" % ( time.time() - start_time ) ) + + #fix share before hda + log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." ) + dataset_by_filename = {} + changed_associations = 0 + start_time = time.time() + for dataset in Dataset.filter( Dataset.external_filename.like( '%dataset_%.dat' ) ).all(): + if dataset.file_name in dataset_by_filename: + guessed_dataset = dataset_by_filename[ dataset.file_name ] + else: + guessed_dataset = __guess_dataset_by_filename( dataset.file_name ) + if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ): + guessed_dataset = None + dataset_by_filename[ dataset.file_name ] = guessed_dataset + + if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset? + for dataset_instance in dataset.history_associations + dataset.library_associations: + dataset_instance.dataset = guessed_dataset + changed_associations += 1 + #mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents + dataset.deleted = True + dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id ) + dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct + context.flush() + log.debug( "%i items affected, and restored." % ( changed_associations ) ) + log.debug( "Time elapsed: %s" % ( time.time() - start_time ) ) + +def downgrade(): + log.debug( "Downgrade is not possible." ) +
participants (1)
-
Greg Von Kuster