details: http://www.bx.psu.edu/hg/galaxy/rev/330bf412f311
changeset: 1616:330bf412f311
user: Greg Von Kuster <greg(a)bx.psu.edu>
date: Mon Nov 10 15:30:07 2008 -0500
description:
Fix for purging dataset - add check for pre-history_dataset_association approach to sharing.
1 file(s) affected in this change:
scripts/cleanup_datasets/cleanup_datasets.py
diffs (122 lines):
diff -r 2d7f872ddaf8 -r 330bf412f311 scripts/cleanup_datasets/cleanup_datasets.py
--- a/scripts/cleanup_datasets/cleanup_datasets.py Mon Nov 10 14:22:01 2008 -0500
+++ b/scripts/cleanup_datasets/cleanup_datasets.py Mon Nov 10 15:30:07 2008 -0500
@@ -79,7 +79,7 @@
# Provide info about the histories and datasets that will be affected if the delete_userless_histories function is executed.
history_count = 0
dataset_count = 0
- where = ( h.table.c.user_id==None ) & ( h.table.c.deleted=='f' ) & ( h.table.c.update_time < cutoff_time )
+ where = ( h.table.c.user_id==None ) & ( h.table.c.deleted==False ) & ( h.table.c.update_time < cutoff_time )
histories = h.query().filter( where ).options( eagerload( 'active_datasets' ) ).all()
print '# The following datasets and associated userless histories will be deleted'
@@ -102,7 +102,7 @@
# The datasets associated with each history are also deleted. Nothing is removed from disk.
history_count = 0
dataset_count = 0
- h_where = ( h.table.c.user_id==None ) & ( h.table.c.deleted=='f' ) & ( h.table.c.update_time < cutoff_time )
+ h_where = ( h.table.c.user_id==None ) & ( h.table.c.deleted==False ) & ( h.table.c.update_time < cutoff_time )
print '# The following datasets and associated userless histories have been deleted'
start = time.clock()
@@ -136,7 +136,7 @@
history_count = 0
dataset_count = 0
disk_space = 0
- h_where = ( h.table.c.deleted=='t' ) & ( h.table.c.purged=='f' ) & ( h.table.c.update_time < cutoff_time )
+ h_where = ( h.table.c.deleted==True ) & ( h.table.c.purged==False ) & ( h.table.c.update_time < cutoff_time )
print '# The following datasets and associated deleted histories will be purged'
start = time.clock()
@@ -169,7 +169,7 @@
disk_space = 0
file_size = 0
errors = False
- h_where = ( h.table.c.deleted=='t' ) & ( h.table.c.purged=='f' ) & ( h.table.c.update_time < cutoff_time )
+ h_where = ( h.table.c.deleted==True ) & ( h.table.c.purged==False ) & ( h.table.c.update_time < cutoff_time )
print '# The following datasets and associated deleted histories have been purged'
start = time.clock()
@@ -187,7 +187,7 @@
dataset.file_size = 0
if remove_from_disk:
dataset.flush()
- errmsg = purge_dataset( dataset, m )
+ errmsg = purge_dataset( dataset, d, m )
if errmsg:
errors = True
print errmsg
@@ -221,7 +221,7 @@
# Provide info about the datasets that will be affected if the purge_datasets function is executed.
dataset_count = 0
disk_space = 0
- where = ( d.table.c.deleted=='t' ) & ( d.table.c.purgable=='t' ) & ( d.table.c.purged=='f' ) & ( d.table.c.update_time < cutoff_time )
+ where = ( d.table.c.deleted==True ) & ( d.table.c.purgable==True ) & ( d.table.c.purged==False ) & ( d.table.c.update_time < cutoff_time )
print '# The following deleted datasets will be purged'
start = time.clock()
@@ -243,7 +243,7 @@
dataset_count = 0
disk_space = 0
file_size = 0
- where = ( d.table.c.deleted=='t' ) & ( d.table.c.purgable=='t' ) & ( d.table.c.purged=='f' ) & ( d.table.c.update_time < cutoff_time )
+ where = ( d.table.c.deleted==True ) & ( d.table.c.purgable==True ) & ( d.table.c.purged==False ) & ( d.table.c.update_time < cutoff_time )
print '# The following deleted datasets have been purged'
start = time.clock()
@@ -251,7 +251,7 @@
for dataset in datasets:
file_size = dataset.file_size
if remove_from_disk:
- errmsg = purge_dataset( dataset, m )
+ errmsg = purge_dataset( dataset, d, m )
if errmsg:
print errmsg
else:
@@ -280,23 +280,33 @@
print '# Freed disk space: ', disk_space, '\n'
print "Elapsed time: ", stop - start, "\n"
-def purge_dataset( dataset, m ):
+def purge_dataset( dataset, d, m ):
# Removes the file from disk and updates the database accordingly.
if dataset.deleted:
+ purgable = True
# Remove files from disk and update the database
try:
- dataset.purged = True
- dataset.file_size = 0
- dataset.flush()
- for shared_data in dataset.history_associations:
- # Check to see if another dataset is using this file. This happens when a user shares
- # their history with another user. In this case, a new record is created in the dataset
- # table for each dataset, but the dataset records point to the same data file on disk. So
- # if 1 of the 2 users deletes the dataset from their history but the other doesn't, we need
- # to keep the dataset on disk for the 2nd user.
- if not shared_data.deleted:
- break #only purge when not shared
- else:
+ # See if the dataset has been shared
+ if dataset.external_filename:
+ # This check handles the pre-history_dataset_association approach to sharing.
+ shared_data = d.filter( and_( d.table.c.external_filename==dataset.external_filename, d.table.c.deleted==False ) ).all()
+ if shared_data:
+ purgable = False
+ if purgable:
+ # This check handles the history_dataset_association approach to sharing.
+ for shared_data in dataset.history_associations:
+ # Check to see if another dataset is using this file. This happens when a user shares
+ # their history with another user. In this case, a new record is created in the dataset
+ # table for each dataset, but the dataset records point to the same data file on disk. So
+ # if 1 of the 2 users deletes the dataset from their history but the other doesn't, we need
+ # to keep the dataset on disk for the 2nd user.
+ if not shared_data.deleted:
+ purgable = False
+ break
+ if purgable:
+ dataset.purged = True
+ dataset.file_size = 0
+ dataset.flush()
# Remove dataset file from disk
os.unlink( dataset.file_name )
print "%s" % dataset.file_name
details: http://www.bx.psu.edu/hg/galaxy/rev/af3fc2158202
changeset: 1609:af3fc2158202
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Wed Nov 05 15:20:47 2008 -0500
description:
A better test to ensure that a MetadataCollection is not stale.
1 file(s) affected in this change:
lib/galaxy/model/__init__.py
diffs (12 lines):
diff -r 53aa8d34c20a -r af3fc2158202 lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py Wed Nov 05 11:43:22 2008 -0500
+++ b/lib/galaxy/model/__init__.py Wed Nov 05 15:20:47 2008 -0500
@@ -188,7 +188,7 @@
return datatypes_registry.get_datatype_by_extension( self.extension )
def get_metadata( self ):
- if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent is None: #using weakref to store parent (to prevent circ ref), does a Session.clear() cause parent to be invalidated, while still copying over this non-database attribute?
+ if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a Session.clear() cause parent to be invalidated, while still copying over this non-database attribute?
self._metadata_collection = MetadataCollection( self )
return self._metadata_collection
def set_metadata( self, bunch ):