details: http://www.bx.psu.edu/hg/galaxy/rev/315ac197ff33 changeset: 2512:315ac197ff33 user: Dan Blankenberg <dan@bx.psu.edu> date: Thu Jul 30 12:56:16 2009 -0400 description: Add a new flag -f/--force_retry to cleanup_datasets.py. This flag will cause the script to attempt to perform the requestion action on objects regardless if it has been performed before. This is useful, i.e. if purge_datasets was called with out using --remove_from_disk, but it is later decided to remove these files: the purge_datasets script should be called with both -r and -f. 1 file(s) affected in this change: scripts/cleanup_datasets/cleanup_datasets.py diffs (162 lines): diff -r ec59d6bcf827 -r 315ac197ff33 scripts/cleanup_datasets/cleanup_datasets.py --- a/scripts/cleanup_datasets/cleanup_datasets.py Thu Jul 30 12:12:26 2009 -0400 +++ b/scripts/cleanup_datasets/cleanup_datasets.py Thu Jul 30 12:56:16 2009 -0400 @@ -24,6 +24,7 @@ parser.add_option( "-d", "--days", dest="days", action="store", type="int", help="number of days (60)", default=60 ) parser.add_option( "-r", "--remove_from_disk", action="store_true", dest="remove_from_disk", help="remove datasets from disk when purged", default=False ) parser.add_option( "-i", "--info_only", action="store_true", dest="info_only", help="info about the requested action", default=False ) + parser.add_option( "-f", "--force_retry", action="store_true", dest="force_retry", help="performs the requested actions, but ignores whether it might have been done before. Useful when -r wasn't used, but should have been", default=False ) parser.add_option( "-1", "--delete_userless_histories", action="store_true", dest="delete_userless_histories", default=False, help="delete userless histories and datasets" ) @@ -73,28 +74,32 @@ print "# Datasets will NOT be removed from disk.\n" if options.delete_userless_histories: - delete_userless_histories( app, cutoff_time, info_only = options.info_only ) + delete_userless_histories( app, cutoff_time, info_only = options.info_only, force_retry = options.force_retry ) elif options.purge_histories: - purge_histories( app, cutoff_time, options.remove_from_disk, info_only = options.info_only ) + purge_histories( app, cutoff_time, options.remove_from_disk, info_only = options.info_only, force_retry = options.force_retry ) elif options.purge_datasets: - purge_datasets( app, cutoff_time, options.remove_from_disk, info_only = options.info_only ) + purge_datasets( app, cutoff_time, options.remove_from_disk, info_only = options.info_only, force_retry = options.force_retry ) elif options.purge_libraries: - purge_libraries( app, cutoff_time, options.remove_from_disk, info_only = options.info_only ) + purge_libraries( app, cutoff_time, options.remove_from_disk, info_only = options.info_only, force_retry = options.force_retry ) elif options.purge_folders: - purge_folders( app, cutoff_time, options.remove_from_disk, info_only = options.info_only ) + purge_folders( app, cutoff_time, options.remove_from_disk, info_only = options.info_only, force_retry = options.force_retry ) sys.exit(0) -def delete_userless_histories( app, cutoff_time, info_only = False ): +def delete_userless_histories( app, cutoff_time, info_only = False, force_retry = False ): # Deletes userless histories whose update_time value is older than the cutoff_time. # The purge history script will handle marking DatasetInstances as deleted. # Nothing is removed from disk yet. history_count = 0 print '# The following datasets and associated userless histories have been deleted' start = time.clock() - histories = app.model.History.filter( and_( app.model.History.table.c.user_id==None, - app.model.History.table.c.deleted==False, - app.model.History.table.c.update_time < cutoff_time ) ).all()# \ + if force_retry: + histories = app.model.History.filter( and_( app.model.History.table.c.user_id==None, + app.model.History.table.c.update_time < cutoff_time ) ).all() + else: + histories = app.model.History.filter( and_( app.model.History.table.c.user_id==None, + app.model.History.table.c.deleted==False, + app.model.History.table.c.update_time < cutoff_time ) ).all() for history in histories: if not info_only: history.deleted = True @@ -106,7 +111,7 @@ print "Elapsed time: ", stop - start, "\n" -def purge_histories( app, cutoff_time, remove_from_disk, info_only = False ): +def purge_histories( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ): # Purges deleted histories whose update_time is older than the cutoff_time. # The dataset associations of each history are also marked as deleted. # The Purge Dataset method will purge each Dataset as necessary @@ -115,10 +120,15 @@ history_count = 0 print '# The following datasets and associated deleted histories have been purged' start = time.clock() - histories = app.model.History.filter( and_( app.model.History.table.c.deleted==True, - app.model.History.table.c.purged==False, - app.model.History.table.c.update_time < cutoff_time ) ) \ - .options( eagerload( 'datasets' ) ).all() + if force_retry: + histories = app.model.History.filter( and_( app.model.History.table.c.deleted==True, + app.model.History.table.c.update_time < cutoff_time ) ) \ + .options( eagerload( 'datasets' ) ).all() + else: + histories = app.model.History.filter( and_( app.model.History.table.c.deleted==True, + app.model.History.table.c.purged==False, + app.model.History.table.c.update_time < cutoff_time ) ) \ + .options( eagerload( 'datasets' ) ).all() for history in histories: for dataset_assoc in history.datasets: _purge_dataset_instance( dataset_assoc, app, remove_from_disk, info_only = info_only ) #mark a DatasetInstance as deleted, clear associated files, and mark the Dataset as deleted if it is deletable @@ -136,7 +146,7 @@ print '# Purged %d histories.' % ( history_count ), '\n' print "Elapsed time: ", stop - start, "\n" -def purge_libraries( app, cutoff_time, remove_from_disk, info_only = False ): +def purge_libraries( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ): # Purges deleted libraries whose update_time is older than the cutoff_time. # The dataset associations of each library are also marked as deleted. # The Purge Dataset method will purge each Dataset as necessary @@ -145,9 +155,13 @@ library_count = 0 print '# The following libraries and associated folders have been purged' start = time.clock() - libraries = app.model.Library.filter( and_( app.model.Library.table.c.deleted==True, - app.model.Library.table.c.purged==False, - app.model.Library.table.c.update_time < cutoff_time ) ).all() + if force_retry: + libraries = app.model.Library.filter( and_( app.model.Library.table.c.deleted==True, + app.model.Library.table.c.update_time < cutoff_time ) ).all() + else: + libraries = app.model.Library.filter( and_( app.model.Library.table.c.deleted==True, + app.model.Library.table.c.purged==False, + app.model.Library.table.c.update_time < cutoff_time ) ).all() for library in libraries: _purge_folder( library.root_folder, app, remove_from_disk, info_only = info_only ) if not info_only: @@ -159,7 +173,7 @@ print '# Purged %d libraries .' % ( library_count ), '\n' print "Elapsed time: ", stop - start, "\n" -def purge_folders( app, cutoff_time, remove_from_disk, info_only = False ): +def purge_folders( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ): # Purges deleted folders whose update_time is older than the cutoff_time. # The dataset associations of each folder are also marked as deleted. # The Purge Dataset method will purge each Dataset as necessary @@ -168,9 +182,13 @@ folder_count = 0 print '# The following folders have been purged' start = time.clock() - folders = app.model.LibraryFolder.filter( and_( app.model.LibraryFolder.table.c.deleted==True, - app.model.LibraryFolder.table.c.purged==False, - app.model.LibraryFolder.table.c.update_time < cutoff_time ) ).all() + if force_retry: + folders = app.model.LibraryFolder.filter( and_( app.model.LibraryFolder.table.c.deleted==True, + app.model.LibraryFolder.table.c.update_time < cutoff_time ) ).all() + else: + folders = app.model.LibraryFolder.filter( and_( app.model.LibraryFolder.table.c.deleted==True, + app.model.LibraryFolder.table.c.purged==False, + app.model.LibraryFolder.table.c.update_time < cutoff_time ) ).all() for folder in folders: _purge_folder( folder, app, remove_from_disk, info_only = info_only ) print "%d" % folder.id @@ -179,17 +197,22 @@ print '# Purged %d folders.' % ( folder_count ), '\n' print "Elapsed time: ", stop - start, "\n" -def purge_datasets( app, cutoff_time, remove_from_disk, info_only = False ): +def purge_datasets( app, cutoff_time, remove_from_disk, info_only = False, repurge = False, force_retry = False ): # Purges deleted datasets whose update_time is older than cutoff_time. Files may or may # not be removed from disk. dataset_count = 0 disk_space = 0 print '# The following deleted datasets have been purged' start = time.clock() - datasets = app.model.Dataset.filter( and_( app.model.Dataset.table.c.deleted==True, - app.model.Dataset.table.c.purgable==True, - app.model.Dataset.table.c.purged==False, - app.model.Dataset.table.c.update_time < cutoff_time ) ).all() + if force_retry: + datasets = app.model.Dataset.filter( and_( app.model.Dataset.table.c.deleted==True, + app.model.Dataset.table.c.purgable==True, + app.model.Dataset.table.c.update_time < cutoff_time ) ).all() + else: + datasets = app.model.Dataset.filter( and_( app.model.Dataset.table.c.deleted==True, + app.model.Dataset.table.c.purgable==True, + app.model.Dataset.table.c.purged==False, + app.model.Dataset.table.c.update_time < cutoff_time ) ).all() for dataset in datasets: file_size = dataset.file_size _purge_dataset( dataset, remove_from_disk, info_only = info_only )