galaxy-dev
Threads by month
- ----- 2025 -----
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- 10008 discussions

29 Apr '09
details: http://www.bx.psu.edu/hg/galaxy/rev/7fd4f748b0ca
changeset: 2375:7fd4f748b0ca
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Mon Apr 27 16:30:08 2009 -0400
description:
Remove direct references of the model from the the cleanup_datasets_fix migration script.
1 file(s) affected in this change:
lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py
diffs (709 lines):
diff -r d0c905db68db -r 7fd4f748b0ca lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py Mon Apr 27 16:30:08 2009 -0400
@@ -0,0 +1,705 @@
+import sys, logging, os, time, datetime, errno
+
+log = logging.getLogger( __name__ )
+log.setLevel(logging.DEBUG)
+handler = logging.StreamHandler( sys.stdout )
+format = "%(name)s %(levelname)s %(asctime)s %(message)s"
+formatter = logging.Formatter( format )
+handler.setFormatter( formatter )
+log.addHandler( handler )
+
+from migrate import migrate_engine
+from sqlalchemy import and_
+
+from sqlalchemy import *
+now = datetime.datetime.utcnow
+from sqlalchemy.orm import *
+
+from galaxy.model.orm.ext.assignmapper import assign_mapper
+
+from galaxy.model.custom_types import *
+
+from galaxy.util.bunch import Bunch
+
+
+metadata = MetaData( migrate_engine )
+context = scoped_session( sessionmaker( autoflush=False, transactional=False ) )
+
+
+## classes
+def get_permitted_actions( **kwds ):
+ return Bunch()
+
+def directory_hash_id( id ):
+ s = str( id )
+ l = len( s )
+ # Shortcut -- ids 0-999 go under ../000/
+ if l < 4:
+ return [ "000" ]
+ # Pad with zeros until a multiple of three
+ padded = ( ( 3 - len( s ) % 3 ) * "0" ) + s
+ # Drop the last three digits -- 1000 files per directory
+ padded = padded[:-3]
+ # Break into chunks of three
+ return [ padded[i*3:(i+1)*3] for i in range( len( padded ) // 3 ) ]
+
+
+class Dataset( object ):
+ states = Bunch( NEW = 'new',
+ UPLOAD = 'upload',
+ QUEUED = 'queued',
+ RUNNING = 'running',
+ OK = 'ok',
+ EMPTY = 'empty',
+ ERROR = 'error',
+ DISCARDED = 'discarded' )
+ permitted_actions = get_permitted_actions( filter='DATASET' )
+ file_path = "/tmp/"
+ engine = None
+ def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ):
+ self.id = id
+ self.state = state
+ self.deleted = False
+ self.purged = False
+ self.purgable = purgable
+ self.external_filename = external_filename
+ self._extra_files_path = extra_files_path
+ self.file_size = file_size
+ def get_file_name( self ):
+ if not self.external_filename:
+ assert self.id is not None, "ID must be set before filename used (commit the object)"
+ # First try filename directly under file_path
+ filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id )
+ # Only use that filename if it already exists (backward compatibility),
+ # otherwise construct hashed path
+ if not os.path.exists( filename ):
+ dir = os.path.join( self.file_path, *directory_hash_id( self.id ) )
+ # Create directory if it does not exist
+ try:
+ os.makedirs( dir )
+ except OSError, e:
+ # File Exists is okay, otherwise reraise
+ if e.errno != errno.EEXIST:
+ raise
+ # Return filename inside hashed directory
+ return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) )
+ else:
+ filename = self.external_filename
+ # Make filename absolute
+ return os.path.abspath( filename )
+ def set_file_name ( self, filename ):
+ if not filename:
+ self.external_filename = None
+ else:
+ self.external_filename = filename
+ file_name = property( get_file_name, set_file_name )
+ @property
+ def extra_files_path( self ):
+ if self._extra_files_path:
+ path = self._extra_files_path
+ else:
+ path = os.path.join( self.file_path, "dataset_%d_files" % self.id )
+ #only use path directly under self.file_path if it exists
+ if not os.path.exists( path ):
+ path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id )
+ # Make path absolute
+ return os.path.abspath( path )
+ def get_size( self ):
+ """Returns the size of the data on disk"""
+ if self.file_size:
+ return self.file_size
+ else:
+ try:
+ return os.path.getsize( self.file_name )
+ except OSError:
+ return 0
+ def set_size( self ):
+ """Returns the size of the data on disk"""
+ try:
+ if not self.file_size:
+ self.file_size = os.path.getsize( self.file_name )
+ except OSError:
+ self.file_size = 0
+ def has_data( self ):
+ """Detects whether there is any data"""
+ return self.get_size() > 0
+ def mark_deleted( self, include_children=True ):
+ self.deleted = True
+ # FIXME: sqlalchemy will replace this
+ def _delete(self):
+ """Remove the file that corresponds to this data"""
+ try:
+ os.remove(self.data.file_name)
+ except OSError, e:
+ log.critical('%s delete error %s' % (self.__class__.__name__, e))
+
+class DatasetInstance( object ):
+ """A base class for all 'dataset instances', HDAs, LDAs, etc"""
+ states = Dataset.states
+ permitted_actions = Dataset.permitted_actions
+ def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None,
+ dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
+ parent_id=None, validation_errors=None, visible=True, create_dataset = False ):
+ self.name = name or "Unnamed dataset"
+ self.id = id
+ self.info = info
+ self.blurb = blurb
+ self.peek = peek
+ self.extension = extension
+ self.designation = designation
+ self.metadata = metadata or dict()
+ if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata'
+ self.dbkey = dbkey
+ self.deleted = deleted
+ self.visible = visible
+ # Relationships
+ if not dataset and create_dataset:
+ dataset = Dataset( state=Dataset.states.NEW )
+ dataset.flush()
+ self.dataset = dataset
+ self.parent_id = parent_id
+ self.validation_errors = validation_errors
+ @property
+ def ext( self ):
+ return self.extension
+ def get_dataset_state( self ):
+ return self.dataset.state
+ def set_dataset_state ( self, state ):
+ self.dataset.state = state
+ self.dataset.flush() #flush here, because hda.flush() won't flush the Dataset object
+ state = property( get_dataset_state, set_dataset_state )
+ def get_file_name( self ):
+ return self.dataset.get_file_name()
+ def set_file_name (self, filename):
+ return self.dataset.set_file_name( filename )
+ file_name = property( get_file_name, set_file_name )
+ @property
+ def extra_files_path( self ):
+ return self.dataset.extra_files_path
+ @property
+ def datatype( self ):
+ return datatypes_registry.get_datatype_by_extension( self.extension )
+ def get_metadata( self ):
+ if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a context.clear() cause parent to be invalidated, while still copying over this non-database attribute?
+ self._metadata_collection = MetadataCollection( self )
+ return self._metadata_collection
+ def set_metadata( self, bunch ):
+ # Needs to accept a MetadataCollection, a bunch, or a dict
+ self._metadata = self.metadata.make_dict_copy( bunch )
+ metadata = property( get_metadata, set_metadata )
+ # This provide backwards compatibility with using the old dbkey
+ # field in the database. That field now maps to "old_dbkey" (see mapping.py).
+ def get_dbkey( self ):
+ dbkey = self.metadata.dbkey
+ if not isinstance(dbkey, list): dbkey = [dbkey]
+ if dbkey in [[None], []]: return "?"
+ return dbkey[0]
+ def set_dbkey( self, value ):
+ if "dbkey" in self.datatype.metadata_spec:
+ if not isinstance(value, list):
+ self.metadata.dbkey = [value]
+ else:
+ self.metadata.dbkey = value
+ dbkey = property( get_dbkey, set_dbkey )
+ def change_datatype( self, new_ext ):
+ self.clear_associated_files()
+ datatypes_registry.change_datatype( self, new_ext )
+ def get_size( self ):
+ """Returns the size of the data on disk"""
+ return self.dataset.get_size()
+ def set_size( self ):
+ """Returns the size of the data on disk"""
+ return self.dataset.set_size()
+ def has_data( self ):
+ """Detects whether there is any data"""
+ return self.dataset.has_data()
+ def get_raw_data( self ):
+ """Returns the full data. To stream it open the file_name and read/write as needed"""
+ return self.datatype.get_raw_data( self )
+ def write_from_stream( self, stream ):
+ """Writes data from a stream"""
+ self.datatype.write_from_stream(self, stream)
+ def set_raw_data( self, data ):
+ """Saves the data on the disc"""
+ self.datatype.set_raw_data(self, data)
+ def get_mime( self ):
+ """Returns the mime type of the data"""
+ return datatypes_registry.get_mimetype_by_extension( self.extension.lower() )
+ def set_peek( self ):
+ return self.datatype.set_peek( self )
+ def set_multi_byte_peek( self ):
+ return self.datatype.set_multi_byte_peek( self )
+ def init_meta( self, copy_from=None ):
+ return self.datatype.init_meta( self, copy_from=copy_from )
+ def set_meta( self, **kwd ):
+ self.clear_associated_files( metadata_safe = True )
+ return self.datatype.set_meta( self, **kwd )
+ def set_readonly_meta( self, **kwd ):
+ return self.datatype.set_readonly_meta( self, **kwd )
+ def missing_meta( self, **kwd ):
+ return self.datatype.missing_meta( self, **kwd )
+ def as_display_type( self, type, **kwd ):
+ return self.datatype.as_display_type( self, type, **kwd )
+ def display_peek( self ):
+ return self.datatype.display_peek( self )
+ def display_name( self ):
+ return self.datatype.display_name( self )
+ def display_info( self ):
+ return self.datatype.display_info( self )
+ def get_converted_files_by_type( self, file_type ):
+ valid = []
+ for assoc in self.implicitly_converted_datasets:
+ if not assoc.deleted and assoc.type == file_type:
+ valid.append( assoc.dataset )
+ return valid
+ def clear_associated_files( self, metadata_safe = False, purge = False ):
+ raise 'Unimplemented'
+ def get_child_by_designation(self, designation):
+ for child in self.children:
+ if child.designation == designation:
+ return child
+ return None
+ def get_converter_types(self):
+ return self.datatype.get_converter_types( self, datatypes_registry)
+ def find_conversion_destination( self, accepted_formats, **kwd ):
+ """Returns ( target_ext, exisiting converted dataset )"""
+ return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd )
+ def add_validation_error( self, validation_error ):
+ self.validation_errors.append( validation_error )
+ def extend_validation_errors( self, validation_errors ):
+ self.validation_errors.extend(validation_errors)
+ def mark_deleted( self, include_children=True ):
+ self.deleted = True
+ if include_children:
+ for child in self.children:
+ child.mark_deleted()
+ def mark_undeleted( self, include_children=True ):
+ self.deleted = False
+ if include_children:
+ for child in self.children:
+ child.mark_undeleted()
+ def undeletable( self ):
+ if self.purged:
+ return False
+ return True
+ @property
+ def source_library_dataset( self ):
+ def get_source( dataset ):
+ if isinstance( dataset, LibraryDatasetDatasetAssociation ):
+ if dataset.library_dataset:
+ return ( dataset, dataset.library_dataset )
+ if dataset.copied_from_library_dataset_dataset_association:
+ source = get_source( dataset.copied_from_library_dataset_dataset_association )
+ if source:
+ return source
+ if dataset.copied_from_history_dataset_association:
+ source = get_source( dataset.copied_from_history_dataset_association )
+ if source:
+ return source
+ return ( None, None )
+ return get_source( self )
+
+
+class HistoryDatasetAssociation( DatasetInstance ):
+ def __init__( self,
+ hid = None,
+ history = None,
+ copied_from_history_dataset_association = None,
+ copied_from_library_dataset_dataset_association = None,
+ **kwd ):
+ DatasetInstance.__init__( self, **kwd )
+ self.hid = hid
+ # Relationships
+ self.history = history
+ self.copied_from_history_dataset_association = copied_from_history_dataset_association
+ self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
+ def copy( self, copy_children = False, parent_id = None, target_history = None ):
+ hda = HistoryDatasetAssociation( hid=self.hid,
+ name=self.name,
+ info=self.info,
+ blurb=self.blurb,
+ peek=self.peek,
+ extension=self.extension,
+ dbkey=self.dbkey,
+ dataset = self.dataset,
+ visible=self.visible,
+ deleted=self.deleted,
+ parent_id=parent_id,
+ copied_from_history_dataset_association=self,
+ history = target_history )
+ hda.flush()
+ hda.set_size()
+ # Need to set after flushed, as MetadataFiles require dataset.id
+ hda.metadata = self.metadata
+ if copy_children:
+ for child in self.children:
+ child_copy = child.copy( copy_children = copy_children, parent_id = hda.id )
+ if not self.datatype.copy_safe_peek:
+ # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
+ hda.set_peek()
+ hda.flush()
+ return hda
+ def to_library_dataset_dataset_association( self, target_folder, replace_dataset=None, parent_id=None ):
+ if replace_dataset:
+ # The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version.
+ library_dataset = replace_dataset
+ else:
+ # If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new
+ # LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset.
+ library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info )
+ library_dataset.flush()
+ ldda = LibraryDatasetDatasetAssociation( name=self.name,
+ info=self.info,
+ blurb=self.blurb,
+ peek=self.peek,
+ extension=self.extension,
+ dbkey=self.dbkey,
+ dataset=self.dataset,
+ library_dataset=library_dataset,
+ visible=self.visible,
+ deleted=self.deleted,
+ parent_id=parent_id,
+ copied_from_history_dataset_association=self,
+ user=self.history.user )
+ ldda.flush()
+ # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
+ # Must set metadata after ldda flushed, as MetadataFiles require ldda.id
+ ldda.metadata = self.metadata
+ if not replace_dataset:
+ target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey )
+ target_folder.flush()
+ library_dataset.library_dataset_dataset_association_id = ldda.id
+ library_dataset.flush()
+ for child in self.children:
+ child_copy = child.to_library_dataset_dataset_association( target_folder=target_folder, replace_dataset=replace_dataset, parent_id=ldda.id )
+ if not self.datatype.copy_safe_peek:
+ # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
+ ldda.set_peek()
+ ldda.flush()
+ return ldda
+ def clear_associated_files( self, metadata_safe = False, purge = False ):
+ # metadata_safe = True means to only clear when assoc.metadata_safe == False
+ for assoc in self.implicitly_converted_datasets:
+ if not metadata_safe or not assoc.metadata_safe:
+ assoc.clear( purge = purge )
+
+
+
+class LibraryDatasetDatasetAssociation( DatasetInstance ):
+ def __init__( self,
+ copied_from_history_dataset_association=None,
+ copied_from_library_dataset_dataset_association=None,
+ library_dataset=None,
+ user=None,
+ **kwd ):
+ DatasetInstance.__init__( self, **kwd )
+ self.copied_from_history_dataset_association = copied_from_history_dataset_association
+ self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
+ self.library_dataset = library_dataset
+ self.user = user
+ def to_history_dataset_association( self, target_history, parent_id=None ):
+ hid = target_history._next_hid()
+ hda = HistoryDatasetAssociation( name=self.name,
+ info=self.info,
+ blurb=self.blurb,
+ peek=self.peek,
+ extension=self.extension,
+ dbkey=self.dbkey,
+ dataset=self.dataset,
+ visible=self.visible,
+ deleted=self.deleted,
+ parent_id=parent_id,
+ copied_from_library_dataset_dataset_association=self,
+ history=target_history,
+ hid=hid )
+ hda.flush()
+ hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id
+ for child in self.children:
+ child_copy = child.to_history_dataset_association( target_history=target_history, parent_id=hda.id )
+ if not self.datatype.copy_safe_peek:
+ hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
+ hda.flush()
+ return hda
+ def copy( self, copy_children = False, parent_id = None, target_folder = None ):
+ ldda = LibraryDatasetDatasetAssociation( name=self.name,
+ info=self.info,
+ blurb=self.blurb,
+ peek=self.peek,
+ extension=self.extension,
+ dbkey=self.dbkey,
+ dataset=self.dataset,
+ visible=self.visible,
+ deleted=self.deleted,
+ parent_id=parent_id,
+ copied_from_library_dataset_dataset_association=self,
+ folder=target_folder )
+ ldda.flush()
+ # Need to set after flushed, as MetadataFiles require dataset.id
+ ldda.metadata = self.metadata
+ if copy_children:
+ for child in self.children:
+ child_copy = child.copy( copy_children = copy_children, parent_id = ldda.id )
+ if not self.datatype.copy_safe_peek:
+ # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
+ ldda.set_peek()
+ ldda.flush()
+ return ldda
+ def clear_associated_files( self, metadata_safe = False, purge = False ):
+ return
+ def get_library_item_info_templates( self, template_list=[], restrict=False ):
+ # If restrict is True, we'll return only those templates directly associated with this LibraryDatasetDatasetAssociation
+ if self.library_dataset_dataset_info_template_associations:
+ template_list.extend( [ lddita.library_item_info_template for lddita in self.library_dataset_dataset_info_template_associations if lddita.library_item_info_template not in template_list ] )
+ self.library_dataset.get_library_item_info_templates( template_list, restrict )
+ return template_list
+
+
+
+class LibraryDataset( object ):
+ # This class acts as a proxy to the currently selected LDDA
+ def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ):
+ self.folder = folder
+ self.order_id = order_id
+ self.name = name
+ self.info = info
+ self.library_dataset_dataset_association = library_dataset_dataset_association
+ def set_library_dataset_dataset_association( self, ldda ):
+ self.library_dataset_dataset_association = ldda
+ ldda.library_dataset = self
+ ldda.flush()
+ self.flush()
+ def get_info( self ):
+ if self.library_dataset_dataset_association:
+ return self.library_dataset_dataset_association.info
+ elif self._info:
+ return self._info
+ else:
+ return 'no info'
+ def set_info( self, info ):
+ self._info = info
+ info = property( get_info, set_info )
+ def get_name( self ):
+ if self.library_dataset_dataset_association:
+ return self.library_dataset_dataset_association.name
+ elif self._name:
+ return self._name
+ else:
+ return 'Unnamed dataset'
+ def set_name( self, name ):
+ self._name = name
+ name = property( get_name, set_name )
+ def display_name( self ):
+ self.library_dataset_dataset_association.display_name()
+ def get_purged( self ):
+ return self.library_dataset_dataset_association.dataset.purged
+ def set_purged( self, purged ):
+ if purged:
+ raise Exception( "Not implemented" )
+ if not purged and self.purged:
+ raise Exception( "Cannot unpurge once purged" )
+ purged = property( get_purged, set_purged )
+ def get_library_item_info_templates( self, template_list=[], restrict=False ):
+ # If restrict is True, we'll return only those templates directly associated with this LibraryDataset
+ if self.library_dataset_info_template_associations:
+ template_list.extend( [ ldita.library_item_info_template for ldita in self.library_dataset_info_template_associations if ldita.library_item_info_template not in template_list ] )
+ if restrict not in [ 'True', True ]:
+ self.folder.get_library_item_info_templates( template_list, restrict )
+ return template_list
+
+##tables
+
+
+Dataset.table = Table( "dataset", metadata,
+ Column( "id", Integer, primary_key=True ),
+ Column( "create_time", DateTime, default=now ),
+ Column( "update_time", DateTime, index=True, default=now, onupdate=now ),
+ Column( "state", TrimmedString( 64 ) ),
+ Column( "deleted", Boolean, index=True, default=False ),
+ Column( "purged", Boolean, index=True, default=False ),
+ Column( "purgable", Boolean, default=True ),
+ Column( "external_filename" , TEXT ),
+ Column( "_extra_files_path", TEXT ),
+ Column( 'file_size', Numeric( 15, 0 ) ) )
+
+
+
+HistoryDatasetAssociation.table = Table( "history_dataset_association", metadata,
+ Column( "id", Integer, primary_key=True ),
+ Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
+ Column( "create_time", DateTime, default=now ),
+ Column( "update_time", DateTime, default=now, onupdate=now ),
+ Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
+ Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
+ Column( "hid", Integer ),
+ Column( "name", TrimmedString( 255 ) ),
+ Column( "info", TrimmedString( 255 ) ),
+ Column( "blurb", TrimmedString( 255 ) ),
+ Column( "peek" , TEXT ),
+ Column( "extension", TrimmedString( 64 ) ),
+ Column( "metadata", MetadataType(), key="_metadata" ),
+ Column( "parent_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
+ Column( "designation", TrimmedString( 255 ) ),
+ Column( "deleted", Boolean, index=True, default=False ),
+ Column( "visible", Boolean ) )
+
+
+LibraryDatasetDatasetAssociation.table = Table( "library_dataset_dataset_association", metadata,
+ Column( "id", Integer, primary_key=True ),
+ Column( "library_dataset_id", Integer, ForeignKey( "library_dataset.id" ), index=True ),
+ Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
+ Column( "create_time", DateTime, default=now ),
+ Column( "update_time", DateTime, default=now, onupdate=now ),
+ Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id", use_alter=True, name='history_dataset_association_dataset_id_fkey' ), nullable=True ),
+ Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name='library_dataset_dataset_association_id_fkey' ), nullable=True ),
+ Column( "name", TrimmedString( 255 ) ),
+ Column( "info", TrimmedString( 255 ) ),
+ Column( "blurb", TrimmedString( 255 ) ),
+ Column( "peek" , TEXT ),
+ Column( "extension", TrimmedString( 64 ) ),
+ Column( "metadata", MetadataType(), key="_metadata" ),
+ Column( "parent_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
+ Column( "designation", TrimmedString( 255 ) ),
+ Column( "deleted", Boolean, index=True, default=False ),
+ Column( "visible", Boolean ),
+ Column( "message", TrimmedString( 255 ) ) )
+
+LibraryDataset.table = Table( "library_dataset", metadata,
+ Column( "id", Integer, primary_key=True ),
+ Column( "library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk" ), nullable=True, index=True ),#current version of dataset, if null, there is not a current version selected
+ Column( "order_id", Integer ),
+ Column( "create_time", DateTime, default=now ),
+ Column( "update_time", DateTime, default=now, onupdate=now ),
+ Column( "name", TrimmedString( 255 ), key="_name" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
+ Column( "info", TrimmedString( 255 ), key="_info" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
+ Column( "deleted", Boolean, index=True, default=False ) )
+
+
+
+##mappers
+
+
+assign_mapper( context, Dataset, Dataset.table,
+ properties=dict(
+ history_associations=relation(
+ HistoryDatasetAssociation,
+ primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) ),
+ active_history_associations=relation(
+ HistoryDatasetAssociation,
+ primaryjoin=( ( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) & ( HistoryDatasetAssociation.table.c.deleted == False ) ) ),
+ library_associations=relation(
+ LibraryDatasetDatasetAssociation,
+ primaryjoin=( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) ),
+ active_library_associations=relation(
+ LibraryDatasetDatasetAssociation,
+ primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) )
+ ) )
+
+
+assign_mapper( context, HistoryDatasetAssociation, HistoryDatasetAssociation.table,
+ properties=dict(
+ dataset=relation(
+ Dataset,
+ primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ),
+ # .history defined in History mapper
+ copied_to_history_dataset_associations=relation(
+ HistoryDatasetAssociation,
+ primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ),
+ backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
+ copied_to_library_dataset_dataset_associations=relation(
+ LibraryDatasetDatasetAssociation,
+ primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
+ backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
+ children=relation(
+ HistoryDatasetAssociation,
+ primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ),
+ backref=backref( "parent", primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
+ visible_children=relation(
+ HistoryDatasetAssociation,
+ primaryjoin=( ( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ) & ( HistoryDatasetAssociation.table.c.visible == True ) ) )
+ ) )
+
+assign_mapper( context, LibraryDatasetDatasetAssociation, LibraryDatasetDatasetAssociation.table,
+ properties=dict(
+ dataset=relation( Dataset ),
+ library_dataset = relation( LibraryDataset,
+ primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ),
+ copied_to_library_dataset_dataset_associations=relation(
+ LibraryDatasetDatasetAssociation,
+ primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
+ backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
+ copied_to_history_dataset_associations=relation(
+ HistoryDatasetAssociation,
+ primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
+ backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
+ children=relation(
+ LibraryDatasetDatasetAssociation,
+ primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ),
+ backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
+ visible_children=relation(
+ LibraryDatasetDatasetAssociation,
+ primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) )
+ ) )
+
+assign_mapper( context, LibraryDataset, LibraryDataset.table,
+ properties=dict(
+ library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, primaryjoin=( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ),
+ expired_datasets = relation( LibraryDatasetDatasetAssociation, foreign_keys=[LibraryDataset.table.c.id,LibraryDataset.table.c.library_dataset_dataset_association_id ], primaryjoin=( ( LibraryDataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.library_dataset_id ) & ( not_( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ) ), viewonly=True, uselist=True )
+ ) )
+
+
+def __guess_dataset_by_filename( filename ):
+ """Return a guessed dataset by filename"""
+ try:
+ fields = os.path.split( filename )
+ if fields:
+ if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat
+ return Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) )
+ except:
+ pass #some parsing error, we can't guess Dataset
+ return None
+
+def upgrade():
+ log.debug( "Fixing a discrepancy concerning deleted shared history items." )
+ affected_items = 0
+ start_time = time.time()
+ for dataset in Dataset.filter( and_( Dataset.c.deleted == True, Dataset.c.purged == False ) ).all():
+ for dataset_instance in dataset.history_associations + dataset.library_associations:
+ if not dataset_instance.deleted:
+ dataset.deleted = False
+ if dataset.file_size in [ None, 0 ]:
+ dataset.set_size() #Restore filesize
+ affected_items += 1
+ break
+ context.flush()
+ log.debug( "%i items affected, and restored." % ( affected_items ) )
+ log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
+
+ #fix share before hda
+ log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." )
+ dataset_by_filename = {}
+ changed_associations = 0
+ start_time = time.time()
+ for dataset in Dataset.filter( Dataset.external_filename.like( '%dataset_%.dat' ) ).all():
+ if dataset.file_name in dataset_by_filename:
+ guessed_dataset = dataset_by_filename[ dataset.file_name ]
+ else:
+ guessed_dataset = __guess_dataset_by_filename( dataset.file_name )
+ if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ):
+ guessed_dataset = None
+ dataset_by_filename[ dataset.file_name ] = guessed_dataset
+
+ if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset?
+ for dataset_instance in dataset.history_associations + dataset.library_associations:
+ dataset_instance.dataset = guessed_dataset
+ changed_associations += 1
+ #mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents
+ dataset.deleted = True
+ dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id )
+ dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct
+ context.flush()
+ log.debug( "%i items affected, and restored." % ( changed_associations ) )
+ log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
+
+def downgrade():
+ log.debug( "Downgrade is not possible." )
+
1
0

29 Apr '09
details: http://www.bx.psu.edu/hg/galaxy/rev/ea9235523069
changeset: 2372:ea9235523069
user: guru
date: Mon Apr 27 11:59:42 2009 -0400
description:
Added an option to mask non-CpG sites to the \"Mask CpG sites\" tool.
2 file(s) affected in this change:
tools/regVariation/maf_cpg_filter.py
tools/regVariation/maf_cpg_filter.xml
diffs (115 lines):
diff -r 76bd876ed230 -r ea9235523069 tools/regVariation/maf_cpg_filter.py
--- a/tools/regVariation/maf_cpg_filter.py Mon Apr 27 11:39:58 2009 -0400
+++ b/tools/regVariation/maf_cpg_filter.py Mon Apr 27 11:59:42 2009 -0400
@@ -1,5 +1,5 @@
#!/usr/bin/python
-
+#Guruprasad Ananda
#Adapted from bx/scripts/maf_mask_cpg.py
"""
Mask out potential CpG sites from a maf. Restricted or inclusive definition
@@ -27,12 +27,11 @@
def main():
options, args = doc_optparse.parse( __doc__ )
try:
- inp_file, out_file, definition = args
+ inp_file, out_file, sitetype, definition = args
if options.mask:
mask = int(options.mask)
else:
mask = 0
- definition = int(definition)
except:
print >> sys.stderr, "Tool initialization error."
sys.exit()
@@ -43,14 +42,19 @@
mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?'}
mask = mask_chr_dict[mask]
- if definition == 1:
- cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
- defn = "Restricted"
+ if sitetype == "CpG":
+ if int(definition) == 1:
+ cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
+ defn = "CpG-Restricted"
+ else:
+ cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
+ defn = "CpG-Inclusive"
else:
- cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
- defn = "Inclusive"
+ cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
+ defn = "non-CpG"
cpgfilter.run( reader, writer.write )
print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
+
if __name__ == "__main__":
main()
diff -r 76bd876ed230 -r ea9235523069 tools/regVariation/maf_cpg_filter.xml
--- a/tools/regVariation/maf_cpg_filter.xml Mon Apr 27 11:39:58 2009 -0400
+++ b/tools/regVariation/maf_cpg_filter.xml Mon Apr 27 11:59:42 2009 -0400
@@ -1,11 +1,16 @@
-<tool id="cpgFilter" name="Mask CpG sites">
+<tool id="cpgFilter" name="Mask CpG/non-CpG sites" version="1.0.0">
<description> from MAF file</description>
<command interpreter="python">
maf_cpg_filter.py
- $input
- $out_file1
- $definition
- -m $mask_char
+ $input
+ $out_file1
+ $masksite.type
+ #if $masksite.type == "CpG":
+ $masksite.definition
+ #else:
+ "NA"
+ #end if
+ -m $mask_char
</command>
<inputs>
<page>
@@ -17,10 +22,19 @@
<option value="3">*</option>
<option value="4">?</option>
</param>
- <param name="definition" size="5" type="select" label="Definition">
- <option value="0" selected="true">Inclusive</option>
- <option value="1">Restricted</option>
- </param>
+ <conditional name="masksite">
+ <param name="type" size="5" type="select" label="Sites to be masked">
+ <option value="CpG" selected="true">CpG sites</option>
+ <option value="nonCpG">non-CpG sites</option>
+ </param>
+ <when value="CpG">
+ <param name="definition" size="5" type="select" label="Definition">
+ <option value="0" selected="true">Inclusive</option>
+ <option value="1">Restricted</option>
+ </param>
+ </when>
+ <when value="nonCpG" />
+ </conditional>
</page>
</inputs>
<outputs>
@@ -33,8 +47,16 @@
<test>
<param name="input" value="6.maf"/>
<param name="mask_char" value="0"/>
+ <param name="type" value="CpG" />
<param name="definition" value="0" />
<output name="out_file1" file="6_mask_cpg.maf"/>
+ </test>
+ <test>
+ <param name="input" value="6.maf"/>
+ <param name="mask_char" value="0"/>
+ <param name="type" value="nonCpG" />
+ <param name="definition" value="NA" />
+ <output name="out_file1" file="6_mask_noncpg.maf"/>
</test>
</tests>
<help>
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/76bd876ed230
changeset: 2371:76bd876ed230
user: Nate Coraor <nate(a)bx.psu.edu>
date: Mon Apr 27 11:39:58 2009 -0400
description:
Update bx-python for Guru
1 file(s) affected in this change:
eggs.ini
diffs (19 lines):
diff -r bb85df3f0baa -r 76bd876ed230 eggs.ini
--- a/eggs.ini Mon Apr 27 10:50:17 2009 -0400
+++ b/eggs.ini Mon Apr 27 11:39:58 2009 -0400
@@ -59,13 +59,13 @@
MySQL_python = _5.0.67_static
python_lzo = _static
flup = .dev_r2311
-bx_python = _dev_r460
+bx_python = _dev_r130f083b56b9
nose = .dev_r101
; source location, necessary for scrambling
[source]
numpy = http://downloads.sourceforge.net/numpy/numpy-1.2.1.tar.gz
-bx_python = http://dist.g2.bx.psu.edu/bx-python_dist-r460.tar.bz2
+bx_python = http://bitbucket.org/james_taylor/bx-python/get/130f083b56b9.bz2
Cheetah = http://voxel.dl.sourceforge.net/sourceforge/cheetahtemplate/Cheetah-1.0.tar…
DRMAA_python = http://gridengine.sunsource.net/files/documents/7/36/DRMAA-python-0.2.tar.gz
MySQL_python = http://superb-west.dl.sourceforge.net/sourceforge/mysql-python/MySQL-python… http://mysql.mirrors.pair.com/Downloads/MySQL-5.0/mysql-5.0.67.tar.gz
1
0

29 Apr '09
details: http://www.bx.psu.edu/hg/galaxy/rev/e7492f6c18ca
changeset: 2366:e7492f6c18ca
user: Greg Von Kuster <greg(a)bx.psu.edu>
date: Fri Apr 24 13:03:57 2009 -0400
description:
Deleting the cleanup_dataset migration script until it is fixed, keeps it from being pushed to public.
1 file(s) affected in this change:
lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py
diffs (78 lines):
diff -r fb401a784261 -r e7492f6c18ca lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py
--- a/lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py Thu Apr 23 12:42:05 2009 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-import sys, logging, os, time
-
-log = logging.getLogger( __name__ )
-log.setLevel(logging.DEBUG)
-handler = logging.StreamHandler( sys.stdout )
-format = "%(name)s %(levelname)s %(asctime)s %(message)s"
-formatter = logging.Formatter( format )
-handler.setFormatter( formatter )
-log.addHandler( handler )
-
-from migrate import migrate_engine
-from sqlalchemy import and_
-
-# load existing galaxy model, we are only changing data
-import galaxy.model
-from galaxy.model import mapping
-model = mapping.init( galaxy.model.Dataset.file_path, str( migrate_engine.url ) )
-
-def __guess_dataset_by_filename( filename ):
- """Return a guessed dataset by filename"""
- try:
- fields = os.path.split( filename )
- if fields:
- if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat
- return model.Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) )
- except:
- pass #some parsing error, we can't guess Dataset
- return None
-
-def upgrade():
- log.debug( "Fixing a discrepancy concerning deleted shared history items." )
- affected_items = 0
- start_time = time.time()
- for dataset in model.Dataset.filter( and_( model.Dataset.c.deleted == True, model.Dataset.c.purged == False ) ).all():
- for dataset_instance in dataset.history_associations + dataset.library_associations:
- if not dataset_instance.deleted:
- dataset.deleted = False
- if dataset.file_size in [ None, 0 ]:
- dataset.set_size() #Restore filesize
- affected_items += 1
- break
- galaxy.model.mapping.Session.flush()
- log.debug( "%i items affected, and restored." % ( affected_items ) )
- log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
-
- #fix share before hda
- log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." )
- dataset_by_filename = {}
- changed_associations = 0
- start_time = time.time()
- for dataset in model.Dataset.filter( model.Dataset.external_filename.like( '%dataset_%.dat' ) ).all():
- if dataset.file_name in dataset_by_filename:
- guessed_dataset = dataset_by_filename[ dataset.file_name ]
- else:
- guessed_dataset = __guess_dataset_by_filename( dataset.file_name )
- if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ):
- guessed_dataset = None
- dataset_by_filename[ dataset.file_name ] = guessed_dataset
-
- if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset?
- for dataset_instance in dataset.history_associations + dataset.library_associations:
- dataset_instance.dataset = guessed_dataset
- changed_associations += 1
- #mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents
- dataset.deleted = True
- dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id )
- dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct
- galaxy.model.mapping.Session.flush()
- log.debug( "%i items affected, and restored." % ( changed_associations ) )
- log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
-
-def downgrade():
- log.debug( "Downgrade is not possible." )
-
1
0

29 Apr '09
details: http://www.bx.psu.edu/hg/galaxy/rev/a1e84abd301a
changeset: 2367:a1e84abd301a
user: Greg Von Kuster <greg(a)bx.psu.edu>
date: Fri Apr 24 13:42:38 2009 -0400
description:
Two fixes: add type checking for id when deleting datasets, ad checks to make sure at least 1 user can access a set of datasets when setting permissions on multiple datasets.
3 file(s) affected in this change:
lib/galaxy/web/controllers/admin.py
lib/galaxy/web/controllers/dataset.py
test/functional/test_security_and_libraries.py
diffs (114 lines):
diff -r e7492f6c18ca -r a1e84abd301a lib/galaxy/web/controllers/admin.py
--- a/lib/galaxy/web/controllers/admin.py Fri Apr 24 13:03:57 2009 -0400
+++ b/lib/galaxy/web/controllers/admin.py Fri Apr 24 13:42:38 2009 -0400
@@ -1495,9 +1495,36 @@
if action == 'permissions':
if params.get( 'update_roles_button', False ):
permissions = {}
+ accessible = False
for k, v in trans.app.model.Dataset.permitted_actions.items():
in_roles = [ trans.app.model.Role.get( x ) for x in util.listify( params.get( k + '_in', [] ) ) ]
- permissions[ trans.app.security_agent.get_action( v.action ) ] = in_roles
+ # At least 1 user must have every role associated with this dataset, or the dataset is inaccessible
+ if v == trans.app.security_agent.permitted_actions.DATASET_ACCESS:
+ if len( in_roles ) > 1:
+ # Get the set of all users that are being associated with the dataset
+ in_roles_set = sets.Set()
+ for role in in_roles:
+ in_roles_set.add( role )
+ users_set = sets.Set()
+ for role in in_roles:
+ for ura in role.users:
+ users_set.add( ura.user )
+ # Make sure that at least 1 user has every role being associated with the dataset
+ for user in users_set:
+ user_roles_set = sets.Set()
+ for ura in user.roles:
+ user_roles_set.add( ura.role )
+ if in_roles_set.issubset( user_roles_set ):
+ accessible = True
+ break
+ else:
+ accessible = True
+ if not accessible and v == trans.app.security_agent.permitted_actions.DATASET_ACCESS:
+ # Don't set the permissions for DATASET_ACCESS if inaccessbile, but set all other permissions
+ # TODO: keep access permissions as they originally were, rather than automatically making public
+ permissions[ trans.app.security_agent.get_action( v.action ) ] = []
+ else:
+ permissions[ trans.app.security_agent.get_action( v.action ) ] = in_roles
for ldda in lddas:
# Set the DATASET permissions on the Dataset
trans.app.security_agent.set_all_dataset_permissions( ldda.dataset, permissions )
@@ -1514,7 +1541,13 @@
# Set the LIBRARY permissions on the LibraryDatasetDatasetAssociation
trans.app.security_agent.set_all_library_permissions( ldda, permissions )
ldda.refresh()
- msg = 'Permissions and roles have been updated on %d datasets' % len( lddas )
+ if not accessible:
+ msg = "At least 1 user must have every role associated with accessing these %d datasets. " % len( lddas )
+ msg += "The roles you attempted to associate for access would make these datasets inaccessible by everyone, "
+ msg += "so access permissions were not set. All other permissions were updated for the datasets."
+ messagetype = 'error'
+ else:
+ msg = "Permissions have been updated on %d datasets" % len( lddas )
return trans.fill_template( "/admin/library/ldda_permissions.mako",
ldda=lddas,
library_id=library_id,
diff -r e7492f6c18ca -r a1e84abd301a lib/galaxy/web/controllers/dataset.py
--- a/lib/galaxy/web/controllers/dataset.py Fri Apr 24 13:03:57 2009 -0400
+++ b/lib/galaxy/web/controllers/dataset.py Fri Apr 24 13:42:38 2009 -0400
@@ -125,19 +125,20 @@
return trans.show_error_message( "You are not allowed to access this dataset" )
def _undelete( self, trans, id ):
- history = trans.get_history()
- data = self.app.model.HistoryDatasetAssociation.get( id )
- if data and data.undeletable:
- # Walk up parent datasets to find the containing history
- topmost_parent = data
- while topmost_parent.parent:
- topmost_parent = topmost_parent.parent
- assert topmost_parent in history.datasets, "Data does not belong to current history"
- # Mark undeleted
- data.mark_undeleted()
- self.app.model.flush()
- trans.log_event( "Dataset id %s has been undeleted" % str(id) )
- return True
+ if isinstance( id, type( 1 ) ):
+ history = trans.get_history()
+ data = self.app.model.HistoryDatasetAssociation.get( id )
+ if data and data.undeletable:
+ # Walk up parent datasets to find the containing history
+ topmost_parent = data
+ while topmost_parent.parent:
+ topmost_parent = topmost_parent.parent
+ assert topmost_parent in history.datasets, "Data does not belong to current history"
+ # Mark undeleted
+ data.mark_undeleted()
+ self.app.model.flush()
+ trans.log_event( "Dataset id %s has been undeleted" % str(id) )
+ return True
return False
@web.expose
diff -r e7492f6c18ca -r a1e84abd301a test/functional/test_security_and_libraries.py
--- a/test/functional/test_security_and_libraries.py Fri Apr 24 13:03:57 2009 -0400
+++ b/test/functional/test_security_and_libraries.py Fri Apr 24 13:42:38 2009 -0400
@@ -737,7 +737,7 @@
url = build_url( permissions, role_one )
self.home()
self.visit_url( url )
- self.check_page_for_string( 'Permissions and roles have been updated on 3 datasets' )
+ self.check_page_for_string( 'Permissions have been updated on 3 datasets' )
def check_edit_page1( lddas ):
# Make sure the permissions have been correctly updated for the 3 datasets. Permissions should
# be all of the above on any of the 3 datasets that are imported into a history
@@ -785,7 +785,7 @@
url = build_url( permissions, role_one )
self.home()
self.visit_url( url )
- self.check_page_for_string( 'Permissions and roles have been updated on 3 datasets' )
+ self.check_page_for_string( 'Permissions have been updated on 3 datasets' )
def check_edit_page2( lddas ):
# Make sure the permissions have been correctly updated for the 3 datasets. Permissions should
# be all of the above on any of the 3 datasets that are imported into a history
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/bc540363324d
changeset: 2368:bc540363324d
user: Anton Nekrutenko <anton(a)bx.psu.edu>
date: Fri Apr 24 15:06:51 2009 -0400
description:
Added column info to MB wrapper
2 file(s) affected in this change:
static/welcome.html
tools/metag_tools/megablast_wrapper.xml
diffs (76 lines):
diff -r a1e84abd301a -r bc540363324d static/welcome.html
--- a/static/welcome.html Fri Apr 24 13:42:38 2009 -0400
+++ b/static/welcome.html Fri Apr 24 15:06:51 2009 -0400
@@ -14,6 +14,7 @@
.current-quickie {
width: 100%;
background: black;
+ borderRadius:10;
}
.quickie .head {
font-size: 200%;
diff -r a1e84abd301a -r bc540363324d tools/metag_tools/megablast_wrapper.xml
--- a/tools/metag_tools/megablast_wrapper.xml Fri Apr 24 13:42:38 2009 -0400
+++ b/tools/metag_tools/megablast_wrapper.xml Fri Apr 24 15:06:51 2009 -0400
@@ -2,20 +2,20 @@
<description> compare short reads against nt and wgs databases</description>
<command interpreter="python">megablast_wrapper.py $source_select $input_query $output1 $word_size $iden_cutoff $evalue_cutoff $filter_query ${GALAXY_DATA_INDEX_DIR}</command>
<inputs>
- <param name="source_select" type="select" display="radio" label="Choose target database">
+ <param name="input_query" type="data" format="fasta" label="Compare these sequences"/>
+ <param name="source_select" type="select" display="radio" label="against target database">
<options from_file="blastdb.loc">
<column name="name" index="0"/>
<column name="value" index="0"/>
</options>
</param>
- <param name="input_query" type="data" format="fasta" label="Sequence file"/>
- <param name="word_size" type="select" label="Word size (-W)" help="Size of best perfect match">
+ <param name="word_size" type="select" label="using word size" help="Size of best perfect match">
<option value="28">28</option>
<option value="16">16</option>
</param>
- <param name="iden_cutoff" type="float" size="15" value="90.0" label="Identity percentage cut-off (-p)" help="no cutoff if 0" />
- <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Expectation value (-e)" />
- <param name="filter_query" type="select" label="Filter query sequence (-F)">
+ <param name="iden_cutoff" type="float" size="15" value="90.0" label="report hits above this identity" help="no cutoff if 0" />
+ <param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff" />
+ <param name="filter_query" type="select" label="Filter out low complexity regions?">
<option value="T">Yes</option>
<option value="F">No</option>
</param>
@@ -49,18 +49,27 @@
This tool runs **megablast** (for information about megablast, please see the reference below) a high performance nucleotide local aligner developed by Webb Miller and colleagues.
-
-----
-**Parameters**
+**Output format**
-- *Word size* (**-W**) : the minimal length of an exact match
-- *Identity percentage cut-off* (**-p**) : the minimal identity for an alignment
-- *Expectation value* (**-e**) : the maximal expectation value for an alignment
-- *Filter query sequence* (**-F**) : mask low-complexity regions in the query sequence
+Output of this tool contains 13 columns delimited by Tabs:
+1. Id of your sequence
+2. GI of the database hit
+3. Length of the database hit
+4. % identity
+5. Alignment length
+6. # mismatches
+7. # gaps
+8. Start position in your sequence
+9. End position in your sequence
+10. Start position in database hit
+11. End position in database hit
+12. E-value
+13. Bit score
------
+-------
**Reference**
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/0035ef5580bb
changeset: 2369:0035ef5580bb
user: Anton Nekrutenko <anton(a)bx.psu.edu>
date: Fri Apr 24 15:08:22 2009 -0400
description:
Fixing accidental change
1 file(s) affected in this change:
static/welcome.html
diffs (11 lines):
diff -r bc540363324d -r 0035ef5580bb static/welcome.html
--- a/static/welcome.html Fri Apr 24 15:06:51 2009 -0400
+++ b/static/welcome.html Fri Apr 24 15:08:22 2009 -0400
@@ -14,7 +14,6 @@
.current-quickie {
width: 100%;
background: black;
- borderRadius:10;
}
.quickie .head {
font-size: 200%;
1
0

29 Apr '09
details: http://www.bx.psu.edu/hg/galaxy/rev/bb85df3f0baa
changeset: 2370:bb85df3f0baa
user: Greg Von Kuster <greg(a)bx.psu.edu>
date: Mon Apr 27 10:50:17 2009 -0400
description:
Fix for column_maker tool when input dataset has missing or invalid metadata values.
1 file(s) affected in this change:
tools/stats/column_maker.py
diffs (33 lines):
diff -r 0035ef5580bb -r bb85df3f0baa tools/stats/column_maker.py
--- a/tools/stats/column_maker.py Fri Apr 24 15:08:22 2009 -0400
+++ b/tools/stats/column_maker.py Mon Apr 27 10:50:17 2009 -0400
@@ -13,15 +13,27 @@
sys.stderr.write( msg )
sys.exit()
+data_err = "This tool can only be used with tab-delimited data."
+columns_err = "Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. "
+column_types_err = "Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. "
+invalid_metadata_err = "The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. "
inp_file = sys.argv[1]
out_file = sys.argv[2]
expr = sys.argv[3]
-round = sys.argv[4]
+round = sys.argv[4]
try:
in_columns = int( sys.argv[5] )
+except:
+ stop_err( columns_err + data_err )
+if in_columns < 2:
+ # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method.
+ stop_err( columns_err + data_err )
+try:
in_column_types = sys.argv[6].split( ',' )
except:
- stop_err( "Data does not appear to be tabular. This tool can only be used with tab-delimited data." )
+ stop_err( column_types_err + data_err )
+if len( in_column_types ) != in_columns:
+ stop_err( invalid_metadata_err + data_err )
# Unescape if input has been escaped
mapped_str = {
1
0

29 Apr '09
details: http://www.bx.psu.edu/hg/galaxy/rev/fb401a784261
changeset: 2365:fb401a784261
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Thu Apr 23 12:42:05 2009 -0400
description:
Add ability for admins to delete and undelete library items.
Modify cleanup_datasets.py script and include a database migration script.
21 file(s) affected in this change:
lib/galaxy/model/__init__.py
lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py
lib/galaxy/util/__init__.py
lib/galaxy/web/controllers/admin.py
scripts/cleanup_datasets/cleanup_datasets.py
scripts/cleanup_datasets/delete_userless_histories.sh
scripts/cleanup_datasets/delete_userless_histories_main.sh
scripts/cleanup_datasets/purge_datasets.sh
scripts/cleanup_datasets/purge_datasets_main.sh
scripts/cleanup_datasets/purge_folders.sh
scripts/cleanup_datasets/purge_folders_main.sh
scripts/cleanup_datasets/purge_histories.sh
scripts/cleanup_datasets/purge_histories_main.sh
scripts/cleanup_datasets/purge_libraries.sh
scripts/cleanup_datasets/purge_libraries_main.sh
static/june_2007_style/base.css.tmpl
static/june_2007_style/blue/base.css
static/june_2007_style/blue/library.css
static/june_2007_style/library.css.tmpl
templates/admin/library/browse_library.mako
templates/admin/library/common.mako
diffs (1069 lines):
diff -r f7336991d0ee -r fb401a784261 lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py Thu Apr 23 09:20:11 2009 -0400
+++ b/lib/galaxy/model/__init__.py Thu Apr 23 12:42:05 2009 -0400
@@ -229,7 +229,7 @@
return des
@property
def activatable_datasets( self ):
- return [ hda for hda in self.datasets if not hda.dataset.purged ] #this needs to be a list
+ return [ hda for hda in self.datasets if not hda.dataset.deleted ] #this needs to be a list
class UserRoleAssociation( object ):
def __init__( self, user, role ):
@@ -707,6 +707,12 @@
@property
def active_components( self ):
return list( self.active_folders ) + list( self.active_datasets )
+ @property
+ def activatable_datasets( self ):
+ return [ ld for ld in self.datasets if not ld.library_dataset_dataset_association.dataset.deleted ] #this needs to be a list
+ @property #make this a relation
+ def activatable_folders( self ):
+ return [ folder for folder in self.folders if not folder.purged ] #this needs to be a list
class LibraryDataset( object ):
# This class acts as a proxy to the currently selected LDDA
@@ -743,6 +749,14 @@
name = property( get_name, set_name )
def display_name( self ):
self.library_dataset_dataset_association.display_name()
+ def get_purged( self ):
+ return self.library_dataset_dataset_association.dataset.purged
+ def set_purged( self, purged ):
+ if purged:
+ raise Exception( "Not implemented" )
+ if not purged and self.purged:
+ raise Exception( "Cannot unpurge once purged" )
+ purged = property( get_purged, set_purged )
def get_library_item_info_templates( self, template_list=[], restrict=False ):
# If restrict is True, we'll return only those templates directly associated with this LibraryDataset
if self.library_dataset_info_template_associations:
@@ -750,7 +764,7 @@
if restrict not in [ 'True', True ]:
self.folder.get_library_item_info_templates( template_list, restrict )
return template_list
-
+
class LibraryDatasetDatasetAssociation( DatasetInstance ):
def __init__( self,
copied_from_history_dataset_association=None,
diff -r f7336991d0ee -r fb401a784261 lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py Thu Apr 23 12:42:05 2009 -0400
@@ -0,0 +1,74 @@
+import sys, logging, os, time
+
+log = logging.getLogger( __name__ )
+log.setLevel(logging.DEBUG)
+handler = logging.StreamHandler( sys.stdout )
+format = "%(name)s %(levelname)s %(asctime)s %(message)s"
+formatter = logging.Formatter( format )
+handler.setFormatter( formatter )
+log.addHandler( handler )
+
+from migrate import migrate_engine
+from sqlalchemy import and_
+
+# load existing galaxy model, we are only changing data
+import galaxy.model
+from galaxy.model import mapping
+model = mapping.init( galaxy.model.Dataset.file_path, str( migrate_engine.url ) )
+
+def __guess_dataset_by_filename( filename ):
+ """Return a guessed dataset by filename"""
+ try:
+ fields = os.path.split( filename )
+ if fields:
+ if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat
+ return model.Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) )
+ except:
+ pass #some parsing error, we can't guess Dataset
+ return None
+
+def upgrade():
+ log.debug( "Fixing a discrepancy concerning deleted shared history items." )
+ affected_items = 0
+ start_time = time.time()
+ for dataset in model.Dataset.filter( and_( model.Dataset.c.deleted == True, model.Dataset.c.purged == False ) ).all():
+ for dataset_instance in dataset.history_associations + dataset.library_associations:
+ if not dataset_instance.deleted:
+ dataset.deleted = False
+ if dataset.file_size in [ None, 0 ]:
+ dataset.set_size() #Restore filesize
+ affected_items += 1
+ break
+ galaxy.model.mapping.Session.flush()
+ log.debug( "%i items affected, and restored." % ( affected_items ) )
+ log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
+
+ #fix share before hda
+ log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." )
+ dataset_by_filename = {}
+ changed_associations = 0
+ start_time = time.time()
+ for dataset in model.Dataset.filter( model.Dataset.external_filename.like( '%dataset_%.dat' ) ).all():
+ if dataset.file_name in dataset_by_filename:
+ guessed_dataset = dataset_by_filename[ dataset.file_name ]
+ else:
+ guessed_dataset = __guess_dataset_by_filename( dataset.file_name )
+ if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ):
+ guessed_dataset = None
+ dataset_by_filename[ dataset.file_name ] = guessed_dataset
+
+ if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset?
+ for dataset_instance in dataset.history_associations + dataset.library_associations:
+ dataset_instance.dataset = guessed_dataset
+ changed_associations += 1
+ #mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents
+ dataset.deleted = True
+ dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id )
+ dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct
+ galaxy.model.mapping.Session.flush()
+ log.debug( "%i items affected, and restored." % ( changed_associations ) )
+ log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
+
+def downgrade():
+ log.debug( "Downgrade is not possible." )
+
diff -r f7336991d0ee -r fb401a784261 lib/galaxy/util/__init__.py
--- a/lib/galaxy/util/__init__.py Thu Apr 23 09:20:11 2009 -0400
+++ b/lib/galaxy/util/__init__.py Thu Apr 23 12:42:05 2009 -0400
@@ -146,7 +146,7 @@
elif isinstance( value, list ):
return map(sanitize_text, value)
else:
- raise Exception, 'Unknown parameter type'
+ raise Exception, 'Unknown parameter type (%s)' % ( type( value ) )
class Params:
"""
diff -r f7336991d0ee -r fb401a784261 lib/galaxy/web/controllers/admin.py
--- a/lib/galaxy/web/controllers/admin.py Thu Apr 23 09:20:11 2009 -0400
+++ b/lib/galaxy/web/controllers/admin.py Thu Apr 23 12:42:05 2009 -0400
@@ -822,11 +822,13 @@
msg=util.sanitize_text( msg ),
messagetype='error' ) )
created_ldda_ids = params.get( 'created_ldda_ids', '' )
+ show_deleted = util.string_as_bool( params.get( 'show_deleted', False ) )
return trans.fill_template( '/admin/library/browse_library.mako',
library=trans.app.model.Library.get( id ),
created_ldda_ids=created_ldda_ids,
msg=msg,
- messagetype=messagetype )
+ messagetype=messagetype,
+ show_deleted=show_deleted )
@web.expose
@web.require_admin
def library( self, trans, id=None, **kwd ):
@@ -960,7 +962,8 @@
libraries=libraries,
deleted=True,
msg=msg,
- messagetype=messagetype )
+ messagetype=messagetype,
+ show_deleted = True )
@web.expose
@web.require_admin
def undelete_library( self, trans, **kwd ):
@@ -2040,6 +2043,50 @@
id=library_id,
msg=util.sanitize_text( msg ),
messagetype=messagetype ) )
+
+ @web.expose
+ @web.require_admin
+ def delete_library_item( self, trans, library_id = None, library_item_id = None, library_item_type = None ):
+ #this action will handle deleting all types of library items in library browsing mode
+ library_item_types = { 'library': trans.app.model.Library, 'folder': trans.app.model.LibraryFolder, 'dataset': trans.app.model.LibraryDataset, }
+ if library_item_type not in library_item_types:
+ raise ValueError( 'Bad library_item_type specified: %s' % library_item_types )
+ if library_item_id is None:
+ raise ValueError( 'library_item_id not specified' )
+ library_item = library_item_types[ library_item_type ].get( int( library_item_id ) )
+ library_item.deleted = True
+ library_item.flush()
+ #need to str because unicode isn't accepted...
+ msg = str( "%s '%s' has been marked deleted" % ( library_item_type, library_item.name ) )
+ messagetype = str( "done" )
+ if library_item_type == 'library' or library_id is None:
+ return self.browse_libraries( trans, msg = msg, messagetype = messagetype )
+ else:
+ return self.browse_library( trans, id = library_id , msg = msg, messagetype = messagetype )
+
+ @web.expose
+ @web.require_admin
+ def undelete_library_item( self, trans, library_id = None, library_item_id = None, library_item_type = None ):
+ #this action will handle deleting all types of library items in library browsing mode
+ library_item_types = { 'library': trans.app.model.Library, 'folder': trans.app.model.LibraryFolder, 'dataset': trans.app.model.LibraryDataset, }
+ if library_item_type not in library_item_types:
+ raise ValueError( 'Bad library_item_type specified: %s' % library_item_types )
+ if library_item_id is None:
+ raise ValueError( 'library_item_id not specified' )
+ library_item = library_item_types[ library_item_type ].get( int( library_item_id ) )
+ if library_item.purged:
+ raise ValueError( '%s %s cannot be undeleted' % ( library_item_type, library_item.name ) )
+ library_item.deleted = False
+ library_item.flush()
+ msg = str( "%s '%s' has been undeleted" % ( library_item_type, library_item.name ) )
+ messagetype = str( "done" )
+ if library_item_type == 'library' or library_id is None:
+ return self.browse_libraries( trans, msg = msg, messagetype = messagetype )
+ else:
+ return self.browse_library( trans, id = library_id , msg = msg, messagetype = messagetype )
+
+
+
#(a)web.expose
#(a)web.require_admin
#def delete_dataset( self, trans, id=None, **kwd):
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/cleanup_datasets.py
--- a/scripts/cleanup_datasets/cleanup_datasets.py Thu Apr 23 09:20:11 2009 -0400
+++ b/scripts/cleanup_datasets/cleanup_datasets.py Thu Apr 23 12:42:05 2009 -0400
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-import sys, os, time, ConfigParser
+import sys, os, time, ConfigParser, shutil
from datetime import datetime, timedelta
from time import strftime
from optparse import OptionParser
@@ -15,7 +15,7 @@
pkg_resources.require( "SQLAlchemy >= 0.4" )
-from galaxy.model.orm import *
+from galaxy.model.orm import and_, eagerload
assert sys.version_info[:2] >= ( 2, 4 )
@@ -23,271 +23,172 @@
parser = OptionParser()
parser.add_option( "-d", "--days", dest="days", action="store", type="int", help="number of days (60)", default=60 )
parser.add_option( "-r", "--remove_from_disk", action="store_true", dest="remove_from_disk", help="remove datasets from disk when purged", default=False )
- parser.add_option( "-1", "--info_delete_userless_histories", action="store_true", dest="info_delete_userless_histories", default=False, help="info about the histories and datasets that will be affected by delete_userless_histories()" )
- parser.add_option( "-2", "--delete_userless_histories", action="store_true", dest="delete_userless_histories", default=False, help="delete userless histories and datasets" )
- parser.add_option( "-3", "--info_purge_histories", action="store_true", dest="info_purge_histories", default=False, help="info about histories and datasets that will be affected by purge_histories()" )
- parser.add_option( "-4", "--purge_histories", action="store_true", dest="purge_histories", default=False, help="purge deleted histories" )
- parser.add_option( "-5", "--info_purge_datasets", action="store_true", dest="info_purge_datasets", default=False, help="info about the datasets that will be affected by purge_datasets()" )
- parser.add_option( "-6", "--purge_datasets", action="store_true", dest="purge_datasets", default=False, help="purge deleted datasets" )
+ parser.add_option( "-i", "--info_only", action="store_true", dest="info_only", help="info about the requested action", default=False )
+
+ parser.add_option( "-1", "--delete_userless_histories", action="store_true", dest="delete_userless_histories", default=False, help="delete userless histories and datasets" )
+
+ parser.add_option( "-2", "--purge_histories", action="store_true", dest="purge_histories", default=False, help="purge deleted histories" )
+
+ parser.add_option( "-3", "--purge_datasets", action="store_true", dest="purge_datasets", default=False, help="purge deleted datasets" )
+
+ parser.add_option( "-4", "--purge_libraries", action="store_true", dest="purge_libraries", default=False, help="purge deleted libraries" )
+
+ parser.add_option( "-5", "--purge_folders", action="store_true", dest="purge_folders", default=False, help="purge deleted library folders" )
+
+
( options, args ) = parser.parse_args()
ini_file = args[0]
- if not ( options.info_delete_userless_histories ^ options.delete_userless_histories ^ \
- options.info_purge_histories ^ options.purge_histories ^ \
- options.info_purge_datasets ^ options.purge_datasets ):
+ if not ( options.purge_folders ^ options.delete_userless_histories ^ \
+ options.purge_libraries ^ options.purge_histories ^ \
+ options.purge_datasets ):
parser.print_help()
sys.exit(0)
+
+ if options.remove_from_disk and options.info_only:
+ parser.error( "remove_from_disk and info_only are mutually exclusive" )
conf_parser = ConfigParser.ConfigParser( {'here':os.getcwd()} )
conf_parser.read( ini_file )
configuration = {}
for key, value in conf_parser.items( "app:main" ):
configuration[key] = value
- database_connection = configuration['database_connection']
+
+ if 'database_connection' in configuration:
+ database_connection = configuration['database_connection']
+ else:
+ database_connection = "sqlite:///%s?isolation_level=IMMEDIATE" % configuration["database_file"]
file_path = configuration['file_path']
app = CleanupDatasetsApplication( database_connection=database_connection, file_path=file_path )
- h = app.model.History
- d = app.model.Dataset
- m = app.model.MetadataFile
cutoff_time = datetime.utcnow() - timedelta( days=options.days )
now = strftime( "%Y-%m-%d %H:%M:%S" )
-
+
print "\n# %s - Handling stuff older than %i days\n" % ( now, options.days )
-
- if options.info_delete_userless_histories:
- info_delete_userless_histories( h, cutoff_time )
- elif options.delete_userless_histories:
- delete_userless_histories( h, d, cutoff_time )
- if options.info_purge_histories:
- info_purge_histories( h, d, cutoff_time )
+
+ if options.info_only:
+ print "# Displaying info only ( --info_only )\n"
+ elif options.remove_from_disk:
+ print "# Datasets will be removed from disk.\n"
+ else:
+ print "# Datasets will NOT be removed from disk.\n"
+
+ if options.delete_userless_histories:
+ delete_userless_histories( app, cutoff_time, info_only = options.info_only )
elif options.purge_histories:
- if options.remove_from_disk:
- print "# Datasets will be removed from disk...\n"
- else:
- print "# Datasets will NOT be removed from disk...\n"
- purge_histories( h, d, m, cutoff_time, options.remove_from_disk )
- elif options.info_purge_datasets:
- info_purge_datasets( d, cutoff_time )
+ purge_histories( app, cutoff_time, options.remove_from_disk, info_only = options.info_only )
elif options.purge_datasets:
- if options.remove_from_disk:
- print "# Datasets will be removed from disk...\n"
- else:
- print "# Datasets will NOT be removed from disk...\n"
- purge_datasets( d, m, cutoff_time, options.remove_from_disk )
+ purge_datasets( app, cutoff_time, options.remove_from_disk, info_only = options.info_only )
+ elif options.purge_libraries:
+ purge_libraries( app, cutoff_time, options.remove_from_disk, info_only = options.info_only )
+ elif options.purge_folders:
+ purge_folders( app, cutoff_time, options.remove_from_disk, info_only = options.info_only )
+
sys.exit(0)
-def info_delete_userless_histories( h, cutoff_time ):
- # Provide info about the histories and datasets that will be affected if the delete_userless_histories function is executed.
+def delete_userless_histories( app, cutoff_time, info_only = False ):
+ # Deletes userless histories whose update_time value is older than the cutoff_time.
+ # The purge history script will handle marking DatasetInstances as deleted.
+ # Nothing is removed from disk yet.
history_count = 0
- dataset_count = 0
- histories = h.filter( and_( h.table.c.user_id==None,
- h.table.c.deleted==False,
- h.table.c.update_time < cutoff_time ) ) \
- .options( eagerload( 'active_datasets' ) ).all()
-
- print '# The following datasets and associated userless histories will be deleted'
+ print '# The following datasets and associated userless histories have been deleted'
start = time.clock()
+ histories = app.model.History.filter( and_( app.model.History.table.c.user_id==None,
+ app.model.History.table.c.deleted==False,
+ app.model.History.table.c.update_time < cutoff_time ) ).all()# \
for history in histories:
- for dataset_assoc in history.active_datasets:
- if not dataset_assoc.deleted:
- # This check is not necessary since 'active_datasets' are not
- # deleted, but just being cautious
- print "dataset_%d" %dataset_assoc.dataset_id
- dataset_count += 1
+ if not info_only:
+ history.deleted = True
print "%d" % history.id
history_count += 1
+ app.model.flush()
stop = time.clock()
- print "# %d histories ( including a total of %d datasets ) will be deleted\n" %( history_count, dataset_count )
- print "Elapsed time: ", stop - start, "\n"
-
-def delete_userless_histories( h, d, cutoff_time ):
- # Deletes userless histories whose update_time value is older than the cutoff_time.
- # The datasets associated with each history are also deleted. Nothing is removed from disk.
- history_count = 0
- dataset_count = 0
-
- print '# The following datasets and associated userless histories have been deleted'
- start = time.clock()
- histories = h.filter( and_( h.table.c.user_id==None,
- h.table.c.deleted==False,
- h.table.c.update_time < cutoff_time ) ) \
- .options( eagerload( 'active_datasets' ) ).all()
- for history in histories:
- for dataset_assoc in history.active_datasets:
- if not dataset_assoc.deleted:
- # Mark all datasets as deleted
- datasets = d.filter( d.table.c.id==dataset_assoc.dataset_id ).all()
- for dataset in datasets:
- if not dataset.deleted:
- dataset.deleted = True
- dataset.flush()
- # Mark the history_dataset_association as deleted
- dataset_assoc.deleted = True
- dataset_assoc.clear_associated_files()
- dataset_assoc.flush()
- print "dataset_%d" % dataset_assoc.dataset_id
- dataset_count += 1
- history.deleted = True
- history.flush()
- print "%d" % history.id
- history_count += 1
- stop = time.clock()
- print "# Deleted %d histories ( including a total of %d datasets )\n" %( history_count, dataset_count )
+ print "# Deleted %d histories.\n" % ( history_count )
print "Elapsed time: ", stop - start, "\n"
-def info_purge_histories( h, d, cutoff_time ):
- # Provide info about the histories and datasets that will be affected if the purge_histories function is executed.
+
+def purge_histories( app, cutoff_time, remove_from_disk, info_only = False ):
+ # Purges deleted histories whose update_time is older than the cutoff_time.
+ # The dataset associations of each history are also marked as deleted.
+ # The Purge Dataset method will purge each Dataset as necessary
+ # history.purged == True simply means that it can no longer be undeleted
+ # i.e. all associated datasets are marked as deleted
history_count = 0
- dataset_count = 0
- disk_space = 0
- print '# The following datasets and associated deleted histories will be purged'
+ print '# The following datasets and associated deleted histories have been purged'
start = time.clock()
- histories = h.filter( and_( h.table.c.deleted==True,
- h.table.c.purged==False,
- h.table.c.update_time < cutoff_time ) ) \
+ histories = app.model.History.filter( and_( app.model.History.table.c.deleted==True,
+ app.model.History.table.c.purged==False,
+ app.model.History.table.c.update_time < cutoff_time ) ) \
.options( eagerload( 'datasets' ) ).all()
for history in histories:
for dataset_assoc in history.datasets:
- # Datasets can only be purged if their HistoryDatasetAssociation has been deleted.
- if dataset_assoc.deleted:
- datasets = d.filter( d.table.c.id==dataset_assoc.dataset_id ).all()
- for dataset in datasets:
- if dataset.purgable and not dataset.purged:
- print "%s" % dataset.file_name
- dataset_count += 1
- try:
- disk_space += dataset.file_size
- except:
- pass
+ _purge_dataset_instance( dataset_assoc, app, remove_from_disk, info_only = info_only ) #mark a DatasetInstance as deleted, clear associated files, and mark the Dataset as deleted if it is deletable
+ if not info_only:
+ history.purged = True
print "%d" % history.id
history_count += 1
+ app.model.flush()
stop = time.clock()
- print '# %d histories ( including a total of %d datasets ) will be purged. Freed disk space: ' %( history_count, dataset_count ), disk_space, '\n'
+ print '# Purged %d histories.' % ( history_count ), '\n'
print "Elapsed time: ", stop - start, "\n"
-def purge_histories( h, d, m, cutoff_time, remove_from_disk ):
- # Purges deleted histories whose update_time is older than the cutoff_time.
- # The datasets associated with each history are also purged.
- history_count = 0
- dataset_count = 0
- disk_space = 0
- file_size = 0
- errors = False
- print '# The following datasets and associated deleted histories have been purged'
+def purge_libraries( app, cutoff_time, remove_from_disk, info_only = False ):
+ # Purges deleted libraries whose update_time is older than the cutoff_time.
+ # The dataset associations of each library are also marked as deleted.
+ # The Purge Dataset method will purge each Dataset as necessary
+ # library.purged == True simply means that it can no longer be undeleted
+ # i.e. all associated LibraryDatasets/folders are marked as deleted
+ library_count = 0
+ print '# The following libraries and associated folders have been purged'
start = time.clock()
- histories = h.filter( and_( h.table.c.deleted==True,
- h.table.c.purged==False,
- h.table.c.update_time < cutoff_time ) ) \
- .options( eagerload( 'datasets' ) ).all()
- for history in histories:
- errors = False
- for dataset_assoc in history.datasets:
- if dataset_assoc.deleted:
- datasets = d.filter( d.table.c.id==dataset_assoc.dataset_id ).all()
- for dataset in datasets:
- if dataset.purgable and not dataset.purged:
- file_size = dataset.file_size
- dataset.deleted = True
- dataset.file_size = 0
- if remove_from_disk:
- dataset.flush()
- errmsg = purge_dataset( dataset, d, m )
- if errmsg:
- errors = True
- print errmsg
- else:
- dataset.purged = True
- dataset.flush()
- print "%s" % dataset.file_name
- # Mark all associated MetadataFiles as deleted and purged
- print "The following metadata files associated with dataset '%s' have been marked purged" % dataset.file_name
- for hda in dataset.history_associations:
- for metadata_file in m.filter( m.table.c.hda_id==hda.id ).all():
- metadata_file.deleted = True
- metadata_file.purged = True
- metadata_file.flush()
- print "%s" % metadata_file.file_name()
- for lda in dataset.library_associations:
- for metadata_file in m.filter( m.table.c.lda_id==lda.id ).all():
- metadata_file.deleted = True
- metadata_file.purged = True
- metadata_file.flush()
- print "%s" % metadata_file.file_name()
- dataset_count += 1
- try:
- disk_space += file_size
- except:
- pass
- if not errors:
- history.purged = True
- history.flush()
- print "%d" % history.id
- history_count += 1
+ libraries = app.model.Library.filter( and_( app.model.Library.table.c.deleted==True,
+ app.model.Library.table.c.purged==False,
+ app.model.Library.table.c.update_time < cutoff_time ) ).all()
+ for library in libraries:
+ _purge_folder( library.root_folder, app, remove_from_disk, info_only = info_only )
+ if not info_only:
+ library.purged = True
+ print "%d" % library.id
+ library_count += 1
+ app.model.flush()
stop = time.clock()
- print '# Purged %d histories ( including a total of %d datasets ). Freed disk space: ' %( history_count, dataset_count ), disk_space, '\n'
+ print '# Purged %d libraries .' % ( library_count ), '\n'
print "Elapsed time: ", stop - start, "\n"
-def info_purge_datasets( d, cutoff_time ):
- # Provide info about the datasets that will be affected if the purge_datasets function is executed.
- dataset_count = 0
- disk_space = 0
- print '# The following deleted datasets will be purged'
+def purge_folders( app, cutoff_time, remove_from_disk, info_only = False ):
+ # Purges deleted folders whose update_time is older than the cutoff_time.
+ # The dataset associations of each folder are also marked as deleted.
+ # The Purge Dataset method will purge each Dataset as necessary
+ # libraryFolder.purged == True simply means that it can no longer be undeleted
+ # i.e. all associated LibraryDatasets/folders are marked as deleted
+ folder_count = 0
+ print '# The following folders have been purged'
start = time.clock()
- datasets = d.filter( and_( d.table.c.deleted==True,
- d.table.c.purgable==True,
- d.table.c.purged==False,
- d.table.c.update_time < cutoff_time ) ).all()
- for dataset in datasets:
- print "%s" % dataset.file_name
- dataset_count += 1
- try:
- disk_space += dataset.file_size
- except:
- pass
+ folders = app.model.LibraryFolder.filter( and_( app.model.LibraryFolder.table.c.deleted==True,
+ app.model.LibraryFolder.table.c.purged==False,
+ app.model.LibraryFolder.table.c.update_time < cutoff_time ) ).all()
+ for folder in folders:
+ _purge_folder( folder, app, remove_from_disk, info_only = info_only )
+ print "%d" % folder.id
+ folder_count += 1
stop = time.clock()
- print '# %d datasets will be purged. Freed disk space: ' %dataset_count, disk_space, '\n'
+ print '# Purged %d folders.' % ( folder_count ), '\n'
print "Elapsed time: ", stop - start, "\n"
-def purge_datasets( d, m, cutoff_time, remove_from_disk ):
+def purge_datasets( app, cutoff_time, remove_from_disk, info_only = False ):
# Purges deleted datasets whose update_time is older than cutoff_time. Files may or may
# not be removed from disk.
dataset_count = 0
disk_space = 0
- file_size = 0
print '# The following deleted datasets have been purged'
start = time.clock()
- datasets = d.filter( and_( d.table.c.deleted==True,
- d.table.c.purgable==True,
- d.table.c.purged==False,
- d.table.c.update_time < cutoff_time ) ).all()
+ datasets = app.model.Dataset.filter( and_( app.model.Dataset.table.c.deleted==True,
+ app.model.Dataset.table.c.purgable==True,
+ app.model.Dataset.table.c.purged==False,
+ app.model.Dataset.table.c.update_time < cutoff_time ) ).all()
for dataset in datasets:
file_size = dataset.file_size
- if remove_from_disk:
- errmsg = purge_dataset( dataset, d, m )
- if errmsg:
- print errmsg
- else:
- dataset_count += 1
- else:
- dataset.purged = True
- dataset.file_size = 0
- dataset.flush()
- print "%s" % dataset.file_name
- # Mark all associated MetadataFiles as deleted and purged
- print "The following metadata files associated with dataset '%s' have been marked purged" % dataset.file_name
- for hda in dataset.history_associations:
- for metadata_file in m.filter( m.table.c.hda_id==hda.id ).all():
- metadata_file.deleted = True
- metadata_file.purged = True
- metadata_file.flush()
- print "%s" % metadata_file.file_name()
- for lda in dataset.library_associations:
- for metadata_file in m.filter( m.table.c.lda_id==lda.id ).all():
- metadata_file.deleted = True
- metadata_file.purged = True
- metadata_file.flush()
- print "%s" % metadata_file.file_name()
- dataset_count += 1
+ _purge_dataset( dataset, remove_from_disk, info_only = info_only )
+ dataset_count += 1
try:
disk_space += file_size
except:
@@ -298,68 +199,90 @@
print '# Freed disk space: ', disk_space, '\n'
print "Elapsed time: ", stop - start, "\n"
-def purge_dataset( dataset, d, m ):
- # Removes the file from disk and updates the database accordingly.
+
+def _purge_dataset_instance( dataset_instance, app, remove_from_disk, include_children = True, info_only = False ):
+ #purging a dataset instance marks the instance as deleted,
+ #and marks the dataset as deleted if it is not associated with another DatsetInstance that is not deleted
+ if not info_only:
+ dataset_instance.mark_deleted( include_children = include_children )
+ dataset_instance.clear_associated_files()
+ dataset_instance.flush()
+ dataset_instance.dataset.refresh()
+ if _dataset_is_deletable( dataset_instance.dataset ):
+ _delete_dataset( dataset_instance.dataset, app, remove_from_disk, info_only = info_only )
+ #need to purge children here
+ if include_children:
+ for child in dataset_instance.children:
+ _purge_dataset_instance( child, app, remove_from_disk, include_children = include_children, info_only = info_only )
+
+def _dataset_is_deletable( dataset ):
+ #a dataset is deletable when it no longer has any non-deleted associations
+ return not bool( dataset.active_history_associations or dataset.active_library_associations )
+
+def _delete_dataset( dataset, app, remove_from_disk, info_only = False ):
+ #marks a base dataset as deleted, hdas/ldas associated with dataset can no longer be undeleted
+ #metadata files attached to associated dataset Instances is removed now
+ if not _dataset_is_deletable( dataset ):
+ print "# This Dataset (%i) is not deletable, associated Metadata Files will not be removed.\n" % ( dataset.id )
+ else:
+ # Mark all associated MetadataFiles as deleted and purged and remove them from disk
+ print "The following metadata files attached to associations of Dataset '%s' have been purged:" % dataset.id
+ metadata_files = []
+ #lets create a list of metadata files, then perform actions on them
+ for hda in dataset.history_associations:
+ for metadata_file in app.model.MetadataFile.filter( app.model.MetadataFile.table.c.hda_id==hda.id ).all():
+ metadata_files.append( metadata_file )
+ for lda in dataset.library_associations:
+ for metadata_file in app.model.MetadataFile.filter( app.model.MetadataFile.table.c.lda_id==lda.id ).all():
+ metadata_files.append( metadata_file )
+ for metadata_file in metadata_files:
+ if not info_only:
+ if remove_from_disk:
+ try:
+ os.unlink( metadata_file.file_name )
+ except Exception, e:
+ print "# Error, exception: %s caught attempting to purge metadata file %s\n" %( str( e ), metadata_file.file_name )
+ metadata_file.purged = True
+ metadata_file.deleted = True
+ #metadata_file.flush()
+ print "%s" % metadata_file.file_name
+ print
+ dataset.deleted = True
+ #dataset.flush()
+ app.model.flush()
+
+def _purge_dataset( dataset, remove_from_disk, info_only = False ):
if dataset.deleted:
- purgable = True
- # Remove files from disk and update the database
try:
- # See if the dataset has been shared
- if dataset.external_filename:
- # This check handles the pre-history_dataset_association approach to sharing.
- shared_data = d.filter( and_( d.table.c.external_filename==dataset.external_filename,
- d.table.c.deleted==False ) ).all()
- if shared_data:
- purgable = False
- if purgable:
- # This check handles the history_dataset_association approach to sharing.
- for shared_data in dataset.history_associations:
- # Check to see if another dataset is using this file. This happens when a user shares
- # their history with another user. In this case, a new record is created in the dataset
- # table for each dataset, but the dataset records point to the same data file on disk. So
- # if 1 of the 2 users deletes the dataset from their history but the other doesn't, we need
- # to keep the dataset on disk for the 2nd user.
- if not shared_data.deleted:
- purgable = False
- break
- if purgable:
- # This check handles the library_dataset_dataset_association approach to sharing.
- for shared_data in dataset.library_associations:
- if not shared_data.deleted:
- purgable = False
- break
- if purgable:
- dataset.purged = True
- dataset.file_size = 0
- dataset.flush()
- # Remove dataset file from disk
- os.unlink( dataset.file_name )
+ if dataset.purgable and _dataset_is_deletable( dataset ):
print "%s" % dataset.file_name
- # Mark all associated MetadataFiles as deleted and purged and remove them from disk
- print "The following metadata files associated with dataset '%s' have been purged" % dataset.file_name
- for hda in dataset.history_associations:
- for metadata_file in m.filter( m.table.c.hda_id==hda.id ).all():
- os.unlink( metadata_file.file_name() )
- metadata_file.deleted = True
- metadata_file.purged = True
- metadata_file.flush()
- print "%s" % metadata_file.file_name()
- for lda in dataset.library_associations:
- for metadata_file in m.filter( m.table.c.lda_id==lda.id ).all():
- metadata_file.deleted = True
- metadata_file.purged = True
- metadata_file.flush()
- print "%s" % metadata_file.file_name()
- try:
- # Remove associated extra files from disk if they exist
- os.unlink( dataset.extra_files_path )
- except:
- pass
+ if not info_only:
+ # Remove files from disk and update the database
+ if remove_from_disk:
+ os.unlink( dataset.file_name )
+ # Remove associated extra files from disk if they exist
+ if dataset.extra_files_path and os.path.exists( dataset.extra_files_path ):
+ shutil.rmtree( dataset.extra_files_path ) #we need to delete the directory and its contents; os.unlink would always fail on a directory
+ dataset.purged = True
+ else:
+ print "# This dataset (%i) is not purgable, the file (%s) will not be removed.\n" % ( dataset.id, dataset.file_name )
except Exception, exc:
- return "# Error, exception: %s caught attempting to purge %s\n" %( str( exc ), dataset.file_name )
+ print "# Error, exception: %s caught attempting to purge %s\n" %( str( exc ), dataset.file_name )
else:
- return "# Error: '%s' has not previously been deleted, so it cannot be purged\n" %dataset.file_name
- return ""
+ print "# Error: '%s' has not previously been deleted, so it cannot be purged\n" % dataset.file_name
+ print ""
+
+def _purge_folder( folder, app, remove_from_disk, info_only = False ):
+ """Purges a folder and its contents, recursively"""
+ for ld in folder.datasets:
+ ld.deleted = True
+ for ldda in [ld.library_dataset_dataset_association] + ld.expired_datasets:
+ _purge_dataset_instance( ldda, app, remove_from_disk, info_only = info_only ) #mark a DatasetInstance as deleted, clear associated files, and mark the Dataset as deleted if it is deletable
+ for sub_folder in folder.folders:
+ _purge_folder( sub_folder, app, remove_from_disk, info_only = info_only )
+ if not info_only:
+ folder.purged = True
+ folder.flush()
class CleanupDatasetsApplication( object ):
"""Encapsulates the state of a Universe application"""
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/delete_userless_histories.sh
--- a/scripts/cleanup_datasets/delete_userless_histories.sh Thu Apr 23 09:20:11 2009 -0400
+++ b/scripts/cleanup_datasets/delete_userless_histories.sh Thu Apr 23 12:42:05 2009 -0400
@@ -1,4 +1,4 @@
#!/bin/sh
cd `dirname $0`/../..
-python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 10 -2 $@ >> ./scripts/cleanup_datasets/delete_userless_histories.log
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 10 -1 $@ >> ./scripts/cleanup_datasets/delete_userless_histories.log
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/delete_userless_histories_main.sh
--- a/scripts/cleanup_datasets/delete_userless_histories_main.sh Thu Apr 23 09:20:11 2009 -0400
+++ b/scripts/cleanup_datasets/delete_userless_histories_main.sh Thu Apr 23 12:42:05 2009 -0400
@@ -1,4 +1,4 @@
#!/bin/sh
cd `dirname $0`/../..
-python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 60 -2 $@ >> ./scripts/cleanup_datasets/delete_userless_histories.log
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 60 -1 $@ >> ./scripts/cleanup_datasets/delete_userless_histories.log
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/purge_datasets.sh
--- a/scripts/cleanup_datasets/purge_datasets.sh Thu Apr 23 09:20:11 2009 -0400
+++ b/scripts/cleanup_datasets/purge_datasets.sh Thu Apr 23 12:42:05 2009 -0400
@@ -1,4 +1,4 @@
#!/bin/sh
cd `dirname $0`/../..
-python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 10 -6 -r $@ >> ./scripts/cleanup_datasets/purge_datasets.log
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 10 -3 -r $@ >> ./scripts/cleanup_datasets/purge_datasets.log
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/purge_datasets_main.sh
--- a/scripts/cleanup_datasets/purge_datasets_main.sh Thu Apr 23 09:20:11 2009 -0400
+++ b/scripts/cleanup_datasets/purge_datasets_main.sh Thu Apr 23 12:42:05 2009 -0400
@@ -1,4 +1,4 @@
#!/bin/sh
cd `dirname $0`/../..
-python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 60 -6 -r $@ >> ./scripts/cleanup_datasets/purge_datasets.log
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 60 -3 -r $@ >> ./scripts/cleanup_datasets/purge_datasets.log
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/purge_folders.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/cleanup_datasets/purge_folders.sh Thu Apr 23 12:42:05 2009 -0400
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+cd `dirname $0`/../..
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 10 -5 -r $@ >> ./scripts/cleanup_datasets/purge_folders.log
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/purge_folders_main.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/cleanup_datasets/purge_folders_main.sh Thu Apr 23 12:42:05 2009 -0400
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+cd `dirname $0`/../..
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 60 -5 -r $@ >> ./scripts/cleanup_datasets/purge_folders.log
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/purge_histories.sh
--- a/scripts/cleanup_datasets/purge_histories.sh Thu Apr 23 09:20:11 2009 -0400
+++ b/scripts/cleanup_datasets/purge_histories.sh Thu Apr 23 12:42:05 2009 -0400
@@ -1,4 +1,4 @@
#!/bin/sh
cd `dirname $0`/../..
-python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 10 -4 -r $@ >> ./scripts/cleanup_datasets/purge_histories.log
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 10 -2 -r $@ >> ./scripts/cleanup_datasets/purge_histories.log
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/purge_histories_main.sh
--- a/scripts/cleanup_datasets/purge_histories_main.sh Thu Apr 23 09:20:11 2009 -0400
+++ b/scripts/cleanup_datasets/purge_histories_main.sh Thu Apr 23 12:42:05 2009 -0400
@@ -1,4 +1,4 @@
#!/bin/sh
cd `dirname $0`/../..
-python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 60 -4 -r $@ >> ./scripts/cleanup_datasets/purge_histories.log
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 60 -2 -r $@ >> ./scripts/cleanup_datasets/purge_histories.log
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/purge_libraries.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/cleanup_datasets/purge_libraries.sh Thu Apr 23 12:42:05 2009 -0400
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+cd `dirname $0`/../..
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 10 -4 -r $@ >> ./scripts/cleanup_datasets/purge_libraries.log
diff -r f7336991d0ee -r fb401a784261 scripts/cleanup_datasets/purge_libraries_main.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/cleanup_datasets/purge_libraries_main.sh Thu Apr 23 12:42:05 2009 -0400
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+cd `dirname $0`/../..
+python ./scripts/cleanup_datasets/cleanup_datasets.py ./universe_wsgi.ini -d 60 -4 -r $@ >> ./scripts/cleanup_datasets/purge_libraries.log
diff -r f7336991d0ee -r fb401a784261 static/june_2007_style/base.css.tmpl
--- a/static/june_2007_style/base.css.tmpl Thu Apr 23 09:20:11 2009 -0400
+++ b/static/june_2007_style/base.css.tmpl Thu Apr 23 12:42:05 2009 -0400
@@ -563,6 +563,7 @@
color: #333;
font-size: 110%;
font-weight: bold;
+ font-style: normal;
white-space: nowrap;
position: absolute;
z-index: 20000;
diff -r f7336991d0ee -r fb401a784261 static/june_2007_style/blue/base.css
--- a/static/june_2007_style/blue/base.css Thu Apr 23 09:20:11 2009 -0400
+++ b/static/june_2007_style/blue/base.css Thu Apr 23 12:42:05 2009 -0400
@@ -559,6 +559,7 @@
color: #333;
font-size: 110%;
font-weight: bold;
+ font-style: normal;
white-space: nowrap;
position: absolute;
z-index: 20000;
diff -r f7336991d0ee -r fb401a784261 static/june_2007_style/blue/library.css
--- a/static/june_2007_style/blue/library.css Thu Apr 23 09:20:11 2009 -0400
+++ b/static/june_2007_style/blue/library.css Thu Apr 23 12:42:05 2009 -0400
@@ -4,6 +4,10 @@
.datasetHighlighted {
background-color: #C1C9E5;
+}
+
+.libraryItemDeleted-True {
+ font-style: italic;
}
div.historyItemBody {
diff -r f7336991d0ee -r fb401a784261 static/june_2007_style/library.css.tmpl
--- a/static/june_2007_style/library.css.tmpl Thu Apr 23 09:20:11 2009 -0400
+++ b/static/june_2007_style/library.css.tmpl Thu Apr 23 12:42:05 2009 -0400
@@ -4,6 +4,10 @@
.datasetHighlighted {
background-color: $menu_bg_over;
+}
+
+.libraryItemDeleted-True {
+ font-style: italic;
}
div.historyItemBody {
diff -r f7336991d0ee -r fb401a784261 templates/admin/library/browse_library.mako
--- a/templates/admin/library/browse_library.mako Thu Apr 23 09:20:11 2009 -0400
+++ b/templates/admin/library/browse_library.mako Thu Apr 23 12:42:05 2009 -0400
@@ -93,7 +93,7 @@
%>
%if not root_folder:
<li class="folderRow libraryOrFolderRow" style="padding-left: ${pad}px;">
- <div class="rowTitle">
+ <div class="rowTitle libraryItemDeleted-${parent.deleted}">
<img src="${h.url_for( expander )}" class="expanderIcon"/><img src="${h.url_for( folder )}" class="rowIcon"/>
${parent.name}
%if parent.description:
@@ -101,7 +101,7 @@
%endif
<a id="folder-${parent.id}-popup" class="popup-arrow" style="display: none;">▼</a>
</div>
- %if not deleted:
+ %if not parent.deleted:
<%
library_item_ids = {}
library_item_ids[ 'folder' ] = parent.id
@@ -117,10 +117,11 @@
<a class="action-button" href="${h.url_for( controller='admin', action='info_template', library_id=library.id, folder_id=parent.id, new_template=True )}">Add an information template to this folder</a>
%endif
<a class="action-button" href="${h.url_for( controller='admin', action='folder', permissions=True, id=parent.id, library_id=library_id )}">Edit this folder's permissions</a>
- ## TODO: need to revamp the way folders and contained LibraryDatasets are deleted
- ##%if subfolder:
- ## <a class="action-button" confirm="Click OK to delete the folder '${parent.name}'" href="${h.url_for( action='folder', delete=True, id=parent.id, library_id=library_id )}">Remove this folder and its contents from the library</a>
- ##%endif
+ <a class="action-button" confirm="Click OK to delete the folder '${parent.name}'" href="${h.url_for( controller='admin', action='delete_library_item', library_id=library_id, library_item_id=parent.id, library_item_type='folder' )}">Remove this folder and its contents from the library</a>
+ </div>
+ %else:
+ <div popupmenu="folder-${parent.id}-popup">
+ <a class="action-button" href="${h.url_for( controller='admin', action='undelete_library_item', library_id=library_id, library_item_id=parent.id, library_item_type='folder' )}">Undelete this folder</a>
</div>
%endif
</li>
@@ -130,10 +131,10 @@
%else:
<ul>
%endif
- %if library.deleted:
+ %if deleted:
<%
- parent_folders = parent.folders
- parent_datasets = parent.datasets
+ parent_folders = parent.activatable_folders
+ parent_datasets = parent.activatable_datasets
%>
%else:
<%
@@ -142,7 +143,7 @@
%>
%endif
%for folder in name_sorted( parent_folders ):
- ${render_folder( folder, pad, library.deleted, created_ldda_ids, library.id )}
+ ${render_folder( folder, pad, deleted, created_ldda_ids, library.id )}
%endfor
%for library_dataset in name_sorted( parent_datasets ):
<%
@@ -182,17 +183,20 @@
<table cellspacing="0" cellpadding="0" border="0" width="100%" class="libraryTitle">
<th width="*">
<img src="${h.url_for( '/static/images/silk/resultset_bottom.png' )}" class="expanderIcon"/><img src="${h.url_for( '/static/images/silk/book_open.png' )}" class="rowIcon"/>
- ${library.name}
- %if library.description:
- <i>- ${library.description}</i>
- %endif
+ <span class="libraryItemDeleted-${library.deleted}">
+ ${library.name}
+ %if library.description:
+ <i>- ${library.description}</i>
+ %endif
+ </span>
<a id="library-${library.id}-popup" class="popup-arrow" style="display: none;">▼</a>
+ <div popupmenu="library-${library.id}-popup">
%if not library.deleted:
<%
library_item_ids = {}
library_item_ids[ 'library' ] = library.id
%>
- <div popupmenu="library-${library.id}-popup">
+
<a class="action-button" href="${h.url_for( controller='admin', action='library', id=library.id, information=True )}">Edit this library's information</a>
%if library.library_info_template_associations:
<% template = library.get_library_item_info_templates( template_list=[], restrict=False )[0] %>
@@ -201,15 +205,16 @@
<a class="action-button" href="${h.url_for( controller='admin', action='info_template', library_id=library.id, new_template=True )}">Add an information template to this library</a>
%endif
<a class="action-button" href="${h.url_for( controller='admin', action='library', id=library.id, permissions=True )}">Edit this library's permissions</a>
- ## TODO: need to revamp the way libraries, folders, and contained LibraryDatasets are deleted
- ##<a class="action-button" confirm="Current state will not be saved, so undeleting the library will restore all of its contents. Click OK to delete the library named '${library.name}'?" href="${h.url_for( controller='admin', action='library', delete=True, id=library.id )}">Delete this library and its contents</a>
- </div>
- ##%else:
- ## <div popupmenu="library-${library.id}-popup">
- ## <a class="action-button" href="${h.url_for( controller='admin', action='undelete_library', id=library.id )}">Undelete this library and its contents</a>
- ## <a class="action-button" href="${h.url_for( controller='admin', action='purge_library', id=library.id )}">Purge this library and its contents</a>
- ## </div>
+ <a class="action-button" confirm="Current state will not be saved, so undeleting the library will restore all of its contents. Click OK to delete the library named '${library.name}'?" href="${h.url_for( controller='admin', action='delete_library_item', library_item_type='library', library_item_id=library.id )}">Delete this library and its contents</a>
+ %if show_deleted:
+ <a class="action-button" href="${h.url_for( controller='admin', action='browse_library', id=library.id, show_deleted=False )}">Hide deleted library items</a>
+ %else:
+ <a class="action-button" href="${h.url_for( controller='admin', action='browse_library', id=library.id, show_deleted=True )}">Show deleted library items</a>
+ %endif
+ %elif not library.purged:
+ <a class="action-button" href="${h.url_for( controller='admin', action='undelete_library_item', library_item_type='library', library_item_id=library.id )}">Undelete this library</a>
%endif
+ </div>
</th>
<th width="300">Information</th>
<th width="150">Uploaded By</th>
@@ -218,7 +223,7 @@
</div>
</li>
<ul>
- ${render_folder( library.root_folder, 0, library.deleted, created_ldda_ids, library.id )}
+ ${render_folder( library.root_folder, 0, library.deleted or show_deleted, created_ldda_ids, library.id )}
</ul>
<br/>
</ul>
diff -r f7336991d0ee -r fb401a784261 templates/admin/library/common.mako
--- a/templates/admin/library/common.mako Thu Apr 23 09:20:11 2009 -0400
+++ b/templates/admin/library/common.mako Thu Apr 23 12:42:05 2009 -0400
@@ -1,6 +1,6 @@
<% from time import strftime %>
-<%def name="render_dataset( library_dataset, selected, library )">
+<%def name="render_dataset( library_dataset, selected, library, show_deleted = False )">
<%
## The received data must always be a LibraryDataset object, but the object id passed to methods from the drop down menu
## should be the underlying ldda id to prevent id collision ( which could happen when displaying children, which are always
@@ -27,13 +27,15 @@
%else:
<input type="checkbox" name="ldda_ids" value="${ldda.id}"/>
%endif
- <a href="${h.url_for( controller='admin', action='library_dataset_dataset_association', library_id=library.id, folder_id=library_dataset.folder.id, id=ldda.id, info=True )}"><b>${ldda.name[:50]}</b></a>
- %if not library.deleted:
+ <span class="libraryItemDeleted-${library_dataset.deleted}">
+ <a href="${h.url_for( controller='admin', action='library_dataset_dataset_association', library_id=library.id, folder_id=library_dataset.folder.id, id=ldda.id, info=True )}"><b>${ldda.name[:50]}</b></a>
+ </span>
+ <a id="dataset-${ldda.id}-popup" class="popup-arrow" style="display: none;">▼</a>
+ %if not library_dataset.deleted:
<%
library_item_ids = {}
library_item_ids[ 'ldda' ] = ldda.id
%>
- <a id="dataset-${ldda.id}-popup" class="popup-arrow" style="display: none;">▼</a>
<div popupmenu="dataset-${ldda.id}-popup">
<a class="action-button" href="${h.url_for( controller='admin', action='library_dataset_dataset_association', library_id=library.id, folder_id=library_dataset.folder.id, id=ldda.id, edit_info=True )}">Edit this dataset's information</a>
## We're disabling the ability to add templates at the LDDA and LibraryDataset level, but will leave this here for possible future use
@@ -46,7 +48,11 @@
<a class="action-button" href="${h.url_for( controller='admin', action='download_dataset_from_folder', id=ldda.id, library_id=library.id )}">Download this dataset</a>
%endif
##TODO: need to revamp the way we remove datasets from disk.
- ##<a class="action-button" confirm="Click OK to remove dataset '${ldda.name}'?" href="${h.url_for( controller='admin', action='library_dataset_dataset_association', library_id=library.id, folder_id=library_dataset.folder.id, id=ldda.id, delete=True )}">Remove this dataset from the library</a>
+ <a class="action-button" confirm="Click OK to remove dataset '${ldda.name}'?" href="${h.url_for( controller='admin', action='delete_library_item', library_id=library.id, library_item_id=library_dataset.id, library_item_type='dataset' )}">Remove this dataset from the library</a>
+ </div>
+ %else:
+ <div popupmenu="dataset-${ldda.id}-popup">
+ <a class="action-button" href="${h.url_for( controller='admin', action='undelete_library_item', library_id=library.id, library_item_id=library_dataset.id, library_item_type='dataset' )}">Undelete this dataset</a>
</div>
%endif
</td>
1
0
Hello,
I've encountered a strange situation (at at least strange for me):
Users are submitting jobs, and the jobs start almost immediately.
However, the jobs look like they are running for very long time.
Checking the report web page, a job looks like the attached image.
The status is "running", but the command line is empty, and no program
was executed for this job (I checked with "ps ax -H" and looked for
python's child-processes).
Some technical information:
Running on Fedora with Python 2.4.3, PostgreSQL 8.0.
The server is loaded but not too loaded ( 14.4 load average for 16 cores ).
Relevant settings from universe_wsgi.ini:
use_threadpool = true
threadpool_workers = 10
local_job_queue_workers = 5
job_scheduler_policy =
galaxy.jobs.schedulingpolicy.roundrobin:UserRoundRobin
job_queue_cleanup_interval = 30
Is this normal ?
Thanks,
Gordon.
2
3