[hg] galaxy 1640: Fixes, code cleanup in cleanup_datasets.py.

4 Dec 2008

details:   http://www.bx.psu.edu/hg/galaxy/rev/20683fe3bfcb
changeset: 1640:20683fe3bfcb
user:      Greg Von Kuster <greg@bx.psu.edu>
date:      Sun Nov 30 08:03:21 2008 -0500
description:
Fixes, code cleanup in cleanup_datasets.py.

1 file(s) affected in this change:

scripts/cleanup_datasets/cleanup_datasets.py

diffs (141 lines):

diff -r bcd158679985 -r 20683fe3bfcb scripts/cleanup_datasets/cleanup_datasets.py

--- a/scripts/cleanup_datasets/cleanup_datasets.py	Mon Nov 24 11:46:45 2008 -0500
+++ b/scripts/cleanup_datasets/cleanup_datasets.py	Sun Nov 30 08:03:21 2008 -0500
@@ -9,12 +9,7 @@
 new_path.extend( sys.path[1:] ) # remove scripts/ from the path
 sys.path = new_path
 
-from galaxy import eggs
-import galaxy.model.mapping
-import pkg_resources
-        
-pkg_resources.require( "SQLAlchemy >= 0.4" )
-from sqlalchemy.orm import eagerload
+from galaxy.model.orm import *
 
 assert sys.version_info[:2] >= ( 2, 4 )
 
@@ -79,8 +74,10 @@
     # Provide info about the histories and datasets that will be affected if the delete_userless_histories function is executed.
     history_count = 0
     dataset_count = 0
-    where = ( h.table.c.user_id==None ) & ( h.table.c.deleted==False ) & ( h.table.c.update_time < cutoff_time )
-    histories = h.query().filter( where ).options( eagerload( 'active_datasets' ) ).all()
+    histories = h.filter( and_( h.table.c.user_id==None,
+                                h.table.c.deleted==False,
+                                h.table.c.update_time < cutoff_time ) ) \
+                 .options( eagerload( 'active_datasets' ) ).all()
 
     print '# The following datasets and associated userless histories will be deleted'
     start = time.clock()
@@ -102,17 +99,18 @@
     # The datasets associated with each history are also deleted.  Nothing is removed from disk.
     history_count = 0
     dataset_count = 0
-    h_where = ( h.table.c.user_id==None ) & ( h.table.c.deleted==False ) & ( h.table.c.update_time < cutoff_time )
 
     print '# The following datasets and associated userless histories have been deleted'
     start = time.clock()
-    histories = h.query().filter( h_where ).options( eagerload( 'active_datasets' ) ).all()
+    histories = h.filter( and_( h.table.c.user_id==None,
+                                h.table.c.deleted==False,
+                                h.table.c.update_time < cutoff_time ) ) \
+                 .options( eagerload( 'active_datasets' ) ).all()
     for history in histories:
         for dataset_assoc in history.active_datasets:
             if not dataset_assoc.deleted:
                 # Mark all datasets as deleted
-                d_where = ( d.table.c.id==dataset_assoc.dataset_id )
-                datasets = d.query().filter( d_where ).all()
+                datasets = d.filter( d.table.c.id==dataset_assoc.dataset_id ).all()
                 for dataset in datasets:
                     if not dataset.deleted:
                         dataset.deleted = True
@@ -136,17 +134,17 @@
     history_count = 0
     dataset_count = 0
     disk_space = 0
-    h_where = ( h.table.c.deleted==True ) & ( h.table.c.purged==False ) & ( h.table.c.update_time < cutoff_time )
-
     print '# The following datasets and associated deleted histories will be purged'
     start = time.clock()
-    histories = h.query().filter( h_where ).options( eagerload( 'datasets' ) ).all()
+    histories = h.filter( and_( h.table.c.deleted==True,
+                                h.table.c.purged==False,
+                                h.table.c.update_time < cutoff_time ) ) \
+                 .options( eagerload( 'datasets' ) ).all()
     for history in histories:
         for dataset_assoc in history.datasets:
             # Datasets can only be purged if their HistoryDatasetAssociation has been deleted.
             if dataset_assoc.deleted:
-                d_where = ( d.table.c.id==dataset_assoc.dataset_id )
-                datasets = d.query().filter( d_where ).all()
+                datasets = d.filter( d.table.c.id==dataset_assoc.dataset_id ).all()
                 for dataset in datasets:
                     if dataset.purgable and not dataset.purged:
                         print "%s" % dataset.file_name
@@ -169,17 +167,17 @@
     disk_space = 0
     file_size = 0
     errors = False
-    h_where = ( h.table.c.deleted==True ) & ( h.table.c.purged==False ) & ( h.table.c.update_time < cutoff_time )
-
     print '# The following datasets and associated deleted histories have been purged'
     start = time.clock()
-    histories = h.query().filter( h_where ).options( eagerload( 'datasets' ) ).all()    
+    histories = h.filter( and_( h.table.c.deleted==True,
+                                h.table.c.purged==False,
+                                h.table.c.update_time < cutoff_time ) ) \
+                 .options( eagerload( 'datasets' ) ).all()    
     for history in histories:
         errors = False
         for dataset_assoc in history.datasets:
             if dataset_assoc.deleted:
-                d_where = ( d.table.c.id==dataset_assoc.dataset_id )
-                datasets = d.query().filter( d_where ).all()
+                datasets = d.filter( d.table.c.id==dataset_assoc.dataset_id ).all()
                 for dataset in datasets:
                     if dataset.purgable and not dataset.purged:
                         file_size = dataset.file_size
@@ -221,11 +219,12 @@
     # Provide info about the datasets that will be affected if the purge_datasets function is executed.
     dataset_count = 0
     disk_space = 0
-    where = ( d.table.c.deleted==True ) & ( d.table.c.purgable==True ) & ( d.table.c.purged==False ) & ( d.table.c.update_time < cutoff_time )
-
     print '# The following deleted datasets will be purged'    
     start = time.clock()
-    datasets = d.query().filter( where ).all()
+    datasets = d.filter( and_( d.table.c.deleted==True,
+                               d.table.c.purgable==True,
+                               d.table.c.purged==False,
+                               d.table.c.update_time < cutoff_time ) ).all()
     for dataset in datasets:
         print "%s" % dataset.file_name
         dataset_count += 1
@@ -243,11 +242,12 @@
     dataset_count = 0
     disk_space = 0
     file_size = 0
-    where = ( d.table.c.deleted==True ) & ( d.table.c.purgable==True ) & ( d.table.c.purged==False ) & ( d.table.c.update_time < cutoff_time )
-
     print '# The following deleted datasets have been purged'
     start = time.clock()
-    datasets = d.query().filter( where ).all()
+    datasets = d.filter( and_( d.table.c.deleted==True,
+                               d.table.c.purgable==True,
+                               d.table.c.purged==False,
+                               d.table.c.update_time < cutoff_time ) ).all()
     for dataset in datasets:
         file_size = dataset.file_size
         if remove_from_disk:
@@ -289,7 +289,8 @@
             # See if the dataset has been shared
             if dataset.external_filename:
                 # This check handles the pre-history_dataset_association approach to sharing.
-                shared_data = d.filter( and_( d.table.c.external_filename==dataset.external_filename, d.table.c.deleted==False ) ).all()
+                shared_data = d.filter( and_( d.table.c.external_filename==dataset.external_filename,
+                                              d.table.c.deleted==False ) ).all()
                 if shared_data:
                     purgable = False
             if purgable:

    

Nate Coraor

tags

participants (1)