commit/galaxy-central: 3 new changesets
3 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/aa2d3c5f577b/ changeset: aa2d3c5f577b user: jgoecks date: 2012-01-09 15:48:42 summary: Trackster: organization and variable renaming in slotter code. affected #: 1 file diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r aa2d3c5f577b9b26ef6765a9f7928c1ee948ce92 static/scripts/trackster.js --- a/static/scripts/trackster.js +++ b/static/scripts/trackster.js @@ -3871,7 +3871,7 @@ this.show_labels_scale = 0.001; this.showing_details = false; this.summary_draw_height = 30; - this.inc_slots = {}; + this.slotters = {}; this.start_end_dct = {}; this.left_offset = 200; @@ -3988,10 +3988,8 @@ }, /** * Place features in slots for drawing (i.e. pack features). - * this.inc_slots[level] is created in this method. this.inc_slots[level] - * is a dictionary of slotted features; key is feature uid, value is a dictionary - * with keys 'slot' and 'text'. - * Returns the number of slots used to pack features. + * this.slotters[level] is created in this method. this.slotters[level] + * is a Slotter object. Returns the number of slots used to pack features. */ incremental_slots: function(level, features, mode) { @@ -3999,14 +3997,13 @@ // need to create new slots. var dummy_context = this.view.canvas_manager.dummy_context, - inc_slots = this.inc_slots[level]; - if (!inc_slots || (inc_slots.mode !== mode)) { - inc_slots = new (slotting.FeatureSlotter)( level, mode === "Pack", MAX_FEATURE_DEPTH, function ( x ) { return dummy_context.measureText( x ) } ); - inc_slots.mode = mode; - this.inc_slots[level] = inc_slots; + slotter = this.slotters[level]; + if (!slotter || (slotter.mode !== mode)) { + slotter = new (slotting.FeatureSlotter)( level, mode === "Pack", MAX_FEATURE_DEPTH, function ( x ) { return dummy_context.measureText( x ) } ); + this.slotters[level] = slotter; } - return inc_slots.slot_features( features ); + return slotter.slot_features( features ); }, /** * Given feature data, returns summary tree data. Feature data must be sorted by start @@ -4225,7 +4222,7 @@ if (result.data) { // Draw features. - slots = this.inc_slots[w_scale].slots; + slots = this.slotters[w_scale].slots; feature_mapper = painter.draw(ctx, canvas.width, canvas.height, w_scale, slots); feature_mapper.translation = -left_offset; } @@ -4341,7 +4338,7 @@ * This implementation is incremental, any feature assigned a slot will be * retained for slotting future features. */ -exports.FeatureSlotter = function ( w_scale, include_label, max_rows, measureText ) { +exports.FeatureSlotter = function (w_scale, include_label, max_rows, measureText) { this.slots = {}; this.start_end_dct = {}; this.w_scale = w_scale; @@ -4356,8 +4353,12 @@ */ extend( exports.FeatureSlotter.prototype, { slot_features: function( features ) { - var w_scale = this.w_scale, inc_slots = this.slots, start_end_dct = this.start_end_dct, - undone = [], slotted = [], highest_slot = 0, max_rows = this.max_rows; + var w_scale = this.w_scale, + start_end_dct = this.start_end_dct, + undone = [], + slotted = [], + highest_slot = 0, + max_rows = this.max_rows; // If feature already exists in slots (from previously seen tiles), use the same slot, // otherwise if not seen, add to "undone" list for slot calculation. @@ -4368,9 +4369,9 @@ for (var i = 0, len = features.length; i < len; i++) { var feature = features[i], feature_uid = feature[0]; - if (inc_slots[feature_uid] !== undefined) { - highest_slot = Math.max(highest_slot, inc_slots[feature_uid]); - slotted.push(inc_slots[feature_uid]); + if (this.slots[feature_uid] !== undefined) { + highest_slot = Math.max(highest_slot, this.slots[feature_uid]); + slotted.push(this.slots[feature_uid]); } else { undone.push(i); } @@ -4462,7 +4463,7 @@ start_end_dct[slot_num] = []; } start_end_dct[slot_num].push([f_start, f_end]); - inc_slots[feature_uid] = slot_num; + this.slots[feature_uid] = slot_num; highest_slot = Math.max(highest_slot, slot_num); } else { https://bitbucket.org/galaxy/galaxy-central/changeset/38c1df19946e/ changeset: 38c1df19946e user: jgoecks date: 2012-01-09 16:10:17 summary: Trackster: add tipsy for icons used to get more data. affected #: 1 file diff -r aa2d3c5f577b9b26ef6765a9f7928c1ee948ce92 -r 38c1df19946ece9670d6cfd8df6baef73655dded static/scripts/trackster.js --- a/static/scripts/trackster.js +++ b/static/scripts/trackster.js @@ -2518,11 +2518,13 @@ if (this.message) { var canvas = this.html_elt.children()[0], - message_div = $("<div/>").addClass("tile-message").text(this.message). + message_div = $("<div/>").addClass("tile-message").text(this.message) // -1 to account for border. - css({'height': ERROR_PADDING-1, 'width': canvas.width}).prependTo(this.html_elt), - more_down_icon = $("<a href='javascript:void(0);'/>").addClass("icon more-down").appendTo(message_div), - more_across_icon = $("<a href='javascript:void(0);'/>").addClass("icon more-across").appendTo(message_div); + .css({'height': ERROR_PADDING-1, 'width': canvas.width}).prependTo(this.html_elt), + more_down_icon = $("<a href='javascript:void(0);'/>").addClass("icon more-down") + .attr("title", "Get more data including depth").tipsy( {gravity: 's'} ).appendTo(message_div), + more_across_icon = $("<a href='javascript:void(0);'/>").addClass("icon more-across") + .attr("title", "Get more data excluding depth").tipsy( {gravity: 's'} ).appendTo(message_div); // Set up actions for icons. var tile = this; @@ -2530,6 +2532,7 @@ // Mark tile as stale, request more data, and redraw track. tile.stale = true; track.data_manager.get_more_data(tile.low, tile.high, track.mode, tile.resolution, {}, track.data_manager.DEEP_DATA_REQ); + $(".tipsy").hide(); track.request_draw(); }).dblclick(function(e) { // Do not propogate as this would normally zoom in. @@ -2540,6 +2543,7 @@ // Mark tile as stale, request more data, and redraw track. tile.stale = true; track.data_manager.get_more_data(tile.low, tile.high, track.mode, tile.resolution, {}, track.data_manager.BROAD_DATA_REQ); + $(".tipsy").hide(); track.request_draw(); }).dblclick(function(e) { // Do not propogate as this would normally zoom in. https://bitbucket.org/galaxy/galaxy-central/changeset/4a9f7ff4e2c0/ changeset: 4a9f7ff4e2c0 user: jgoecks date: 2012-01-09 16:13:42 summary: Merge. affected #: 14 files diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e distributed_object_store_conf.xml.sample --- a/distributed_object_store_conf.xml.sample +++ b/distributed_object_store_conf.xml.sample @@ -1,11 +1,11 @@ <?xml version="1.0"?><backends> - <backend name="files1" type="disk" weight="1"> + <backend id="files1" type="disk" weight="1"><files_dir path="database/files1"/><extra_dir type="temp" path="database/tmp1"/><extra_dir type="job_work" path="database/job_working_directory1"/></backend> - <backend name="files2" type="disk" weight="1"> + <backend id="files2" type="disk" weight="1"><files_dir path="database/files2"/><extra_dir type="temp" path="database/tmp2"/><extra_dir type="job_work" path="database/job_working_directory2"/> diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -439,7 +439,7 @@ if mf is None: mf = self.new_file( dataset = parent, **value.kwds ) # Ensure the metadata file gets updated with content - parent.dataset.object_store.update_from_file( parent.dataset.id, file_name=value.file_name, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name=os.path.basename(mf.file_name) ) + parent.dataset.object_store.update_from_file( parent.dataset, file_name=value.file_name, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name=os.path.basename(mf.file_name) ) os.unlink( value.file_name ) value = mf.id return value diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/exceptions/__init__.py --- a/lib/galaxy/exceptions/__init__.py +++ b/lib/galaxy/exceptions/__init__.py @@ -22,3 +22,7 @@ class ObjectNotFound( Exception ): """ Accessed object was not found """ pass + +class ObjectInvalid( Exception ): + """ Accessed object store ID is invalid """ + pass diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -320,19 +320,9 @@ # With job outputs in the working directory, we need the working # directory to be set before prepare is run, or else premature deletion # and job recovery fail. - # Attempt to put the working directory in the same store as the output dataset(s) - store_name = None - da = None - if job.output_datasets: - da = job.output_datasets[0] - elif job.output_library_datasets: - da = job.output_library_datasets[0] - if da is not None: - store_name = self.app.object_store.store_name(da.dataset.id) # Create the working dir if necessary - if not self.app.object_store.exists(self.job_id, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id)): - self.app.object_store.create(self.job_id, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id), store_name=store_name) - self.working_directory = self.app.object_store.get_filename(self.job_id, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id)) + self.app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id)) + self.working_directory = self.app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id)) log.debug('(%s) Working directory for job is: %s' % (self.job_id, self.working_directory)) self.output_paths = None self.output_dataset_paths = None @@ -482,7 +472,7 @@ dataset.extension = 'data' # Update (non-library) job output datasets through the object store if dataset not in job.output_library_datasets: - self.app.object_store.update_from_file(dataset.id, create=True) + self.app.object_store.update_from_file(dataset.dataset, create=True) self.sa_session.add( dataset ) self.sa_session.flush() job.state = job.states.ERROR @@ -606,7 +596,7 @@ dataset.set_size() # Update (non-library) job output datasets through the object store if dataset not in job.output_library_datasets: - self.app.object_store.update_from_file(dataset.id, create=True) + self.app.object_store.update_from_file(dataset.dataset, create=True) if context['stderr']: dataset.blurb = "error" elif dataset.has_data(): @@ -719,8 +709,7 @@ try: for fname in self.extra_filenames: os.remove( fname ) - if self.working_directory is not None and os.path.isdir( self.working_directory ): - shutil.rmtree( self.working_directory ) + self.app.object_store.delete(self.get_job(), base_dir='job_work', dir_only=True, extra_dir=str(self.job_id)) if self.app.config.set_metadata_externally: self.external_output_metadata.cleanup_external_metadata( self.sa_session ) galaxy.tools.imp_exp.JobExportHistoryArchiveWrapper( self.job_id ).cleanup_after_job( self.sa_session ) diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -16,7 +16,6 @@ from galaxy.util.hash_util import * from galaxy.web.form_builder import * from galaxy.model.item_attrs import UsesAnnotations, APIItem -from galaxy.exceptions import ObjectNotFound from sqlalchemy.orm import object_session from sqlalchemy.sql.expression import func import os.path, os, errno, codecs, operator, socket, pexpect, logging, time, shutil @@ -650,12 +649,7 @@ if not self.external_filename: assert self.id is not None, "ID must be set before filename used (commit the object)" assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id - try: - filename = self.object_store.get_filename( self.id ) - except ObjectNotFound, e: - # Create file if it does not exist - self.object_store.create( self.id ) - filename = self.object_store.get_filename( self.id ) + filename = self.object_store.get_filename( self ) return filename else: filename = self.external_filename @@ -669,7 +663,7 @@ file_name = property( get_file_name, set_file_name ) @property def extra_files_path( self ): - return self.object_store.get_filename( self.id, dir_only=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id) + return self.object_store.get_filename( self, dir_only=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id ) def get_size( self, nice_size=False ): """Returns the size of the data on disk""" if self.file_size: @@ -679,16 +673,16 @@ return self.file_size else: if nice_size: - return galaxy.datatypes.data.nice_size( self.object_store.size(self.id) ) + return galaxy.datatypes.data.nice_size( self.object_store.size(self) ) else: - return self.object_store.size(self.id) + return self.object_store.size(self) def set_size( self ): """Returns the size of the data on disk""" if not self.file_size: if self.external_filename: self.file_size = os.path.getsize(self.external_filename) else: - self.file_size = self.object_store.size(self.id) + self.file_size = self.object_store.size(self) def get_total_size( self ): if self.total_size is not None: return self.total_size @@ -703,7 +697,7 @@ if self.file_size is None: self.set_size() self.total_size = self.file_size or 0 - if self.object_store.exists(self.id, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True): + if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True): for root, dirs, files in os.walk( self.extra_files_path ): self.total_size += sum( [ os.path.getsize( os.path.join( root, file ) ) for file in files ] ) def has_data( self ): @@ -721,7 +715,7 @@ # FIXME: sqlalchemy will replace this def _delete(self): """Remove the file that corresponds to this data""" - self.object_store.delete(self.id) + self.object_store.delete(self) @property def user_can_purge( self ): return self.purged == False \ @@ -730,9 +724,9 @@ def full_delete( self ): """Remove the file and extra files, marks deleted and purged""" # os.unlink( self.file_name ) - self.object_store.delete(self.id) - if self.object_store.exists(self.id, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True): - self.object_store.delete(self.id, entire_dir=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True) + self.object_store.delete(self) + if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True): + self.object_store.delete(self, entire_dir=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True) # if os.path.exists( self.extra_files_path ): # shutil.rmtree( self.extra_files_path ) # TODO: purge metadata files @@ -1798,8 +1792,11 @@ assert self.id is not None, "ID must be set before filename used (commit the object)" # Ensure the directory structure and the metadata file object exist try: - self.history_dataset.dataset.object_store.create( self.id, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id ) - path = self.history_dataset.dataset.object_store.get_filename( self.id, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id ) + da = self.history_dataset or self.library_dataset + if self.object_store_id is None and da is not None: + self.object_store_id = da.dataset.object_store_id + da.dataset.object_store.create( self, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id ) + path = da.dataset.object_store.get_filename( self, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id ) return path except AttributeError: # In case we're not working with the history_dataset diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/model/mapping.py --- a/lib/galaxy/model/mapping.py +++ b/lib/galaxy/model/mapping.py @@ -130,6 +130,7 @@ Column( "deleted", Boolean, index=True, default=False ), Column( "purged", Boolean, index=True, default=False ), Column( "purgable", Boolean, default=True ), + Column( "object_store_id", TrimmedString( 255 ), index=True ), Column( "external_filename" , TEXT ), Column( "_extra_files_path", TEXT ), Column( 'file_size', Numeric( 15, 0 ) ), @@ -410,6 +411,7 @@ Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True, nullable=True ), Column( "job_runner_name", String( 255 ) ), Column( "job_runner_external_id", String( 255 ) ), + Column( "object_store_id", TrimmedString( 255 ), index=True ), Column( "imported", Boolean, default=False, index=True ) ) JobParameter.table = Table( "job_parameter", metadata, @@ -641,6 +643,7 @@ Column( "lda_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), index=True, nullable=True ), Column( "create_time", DateTime, default=now ), Column( "update_time", DateTime, index=True, default=now, onupdate=now ), + Column( "object_store_id", TrimmedString( 255 ), index=True ), Column( "deleted", Boolean, index=True, default=False ), Column( "purged", Boolean, index=True, default=False ) ) diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/model/migrate/versions/0089_add_object_store_id_columns.py --- /dev/null +++ b/lib/galaxy/model/migrate/versions/0089_add_object_store_id_columns.py @@ -0,0 +1,38 @@ +""" +Migration script to add 'object_store_id' column to various tables +""" + +from sqlalchemy import * +from sqlalchemy.orm import * +from migrate import * +from migrate.changeset import * + +import logging +log = logging.getLogger( __name__ ) +from galaxy.model.custom_types import TrimmedString + +metadata = MetaData( migrate_engine ) +db_session = scoped_session( sessionmaker( bind=migrate_engine, autoflush=False, autocommit=True ) ) + +def upgrade(): + print __doc__ + metadata.reflect() + for t_name in ( 'dataset', 'job', 'metadata_file' ): + t = Table( t_name, metadata, autoload=True ) + c = Column( "object_store_id", TrimmedString( 255 ), index=True ) + try: + c.create( t ) + assert c is t.c.object_store_id + except Exception, e: + print "Adding object_store_id column to %s table failed: %s" % ( t_name, str( e ) ) + log.debug( "Adding object_store_id column to %s table failed: %s" % ( t_name, str( e ) ) ) + +def downgrade(): + metadata.reflect() + for t_name in ( 'dataset', 'job', 'metadata_file' ): + t = Table( t_name, metadata, autoload=True ) + try: + t.c.object_store_id.drop() + except Exception, e: + print "Dropping object_store_id column from %s table failed: %s" % ( t_name, str( e ) ) + log.debug( "Dropping object_store_id column from %s table failed: %s" % ( t_name, str( e ) ) ) diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/objectstore/__init__.py --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -18,7 +18,9 @@ from galaxy.jobs import Sleeper from galaxy.model import directory_hash_id from galaxy.objectstore.s3_multipart_upload import multipart_upload -from galaxy.exceptions import ObjectNotFound +from galaxy.exceptions import ObjectNotFound, ObjectInvalid + +from sqlalchemy.orm import object_session from boto.s3.key import Key from boto.s3.connection import S3Connection @@ -40,14 +42,15 @@ self.running = False self.extra_dirs = {} - def exists(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def exists(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ - Returns True if the object identified by `dataset_id` exists in this - file store, False otherwise. + Returns True if the object identified by `obj` exists in this file + store, False otherwise. FIELD DESCRIPTIONS (these apply to all the methods in this class): - :type dataset_id: int - :param dataset_id: Galaxy-assigned database ID of the dataset to be checked. + :type obj: object + :param obj: A Galaxy object with an assigned database ID accessible via + the .id attribute. :type base_dir: string :param base_dir: A key in self.extra_dirs corresponding to the base @@ -56,21 +59,21 @@ :type dir_only: bool :param dir_only: If True, check only the path where the file - identified by `dataset_id` should be located, not the - dataset itself. This option applies to `extra_dir` - argument as well. + identified by `obj` should be located, not the dataset + itself. This option applies to `extra_dir` argument as + well. :type extra_dir: string :param extra_dir: Append `extra_dir` to the directory structure where - the dataset identified by `dataset_id` should be located. - (e.g., 000/extra_dir/dataset_id) + the dataset identified by `obj` should be located. + (e.g., 000/extra_dir/obj.id) :type extra_dir_at_root: bool :param extra_dir_at_root: Applicable only if `extra_dir` is set. If True, the `extra_dir` argument is placed at root of the created directory structure rather - than at the end (e.g., extra_dir/000/dataset_id - vs. 000/extra_dir/dataset_id) + than at the end (e.g., extra_dir/000/obj.id + vs. 000/extra_dir/obj.id) :type alt_name: string :param alt_name: Use this name as the alternative name for the created @@ -78,53 +81,39 @@ """ raise NotImplementedError() - def store_name(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): - """ - Returns the name of the store in which the object identified by - `dataset_id` exists, or None if it does not exist or the store is the - default store. - """ - return None - - def file_ready(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def file_ready(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ A helper method that checks if a file corresponding to a dataset is ready and available to be used. Return True if so, False otherwise.""" return True - def create(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, store_name=None): + def create(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ - Mark the object identified by `dataset_id` as existing in the store, but - with no content. This method will create a proper directory structure for + Mark the object identified by `obj` as existing in the store, but with + no content. This method will create a proper directory structure for the file if the directory does not already exist. See `exists` method for the description of other fields. - - :type store_name: string - :param store_name: Backend store in which to create the dataset, if - this store contains more than one backend. If the - named backend does not exist, a backend will be - chosen by the store. """ raise NotImplementedError() - - def empty(self, dataset_id, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None): + + def empty(self, obj, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ - Test if the object identified by `dataset_id` has content. + Test if the object identified by `obj` has content. If the object does not exist raises `ObjectNotFound`. See `exists` method for the description of the fields. """ raise NotImplementedError() - def size(self, dataset_id, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def size(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ - Return size of the object identified by `dataset_id`. + Return size of the object identified by `obj`. If the object does not exist, return 0. See `exists` method for the description of the fields. """ raise NotImplementedError() - def delete(self, dataset_id, entire_dir=False, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def delete(self, obj, entire_dir=False, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ - Deletes the object identified by `dataset_id`. + Deletes the object identified by `obj`. See `exists` method for the description of other fields. :type entire_dir: bool :param entire_dir: If True, delete the entire directory pointed to by @@ -133,10 +122,10 @@ """ raise NotImplementedError() - def get_data(self, dataset_id, start=0, count=-1, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def get_data(self, obj, start=0, count=-1, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ Fetch `count` bytes of data starting at offset `start` from the - object identified uniquely by `dataset_id`. + object identified uniquely by `obj`. If the object does not exist raises `ObjectNotFound`. See `exists` method for the description of other fields. @@ -148,15 +137,15 @@ """ raise NotImplementedError() - def get_filename(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def get_filename(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ Get the expected filename (including the absolute path) which can be used - to access the contents of the object uniquely identified by `dataset_id`. + to access the contents of the object uniquely identified by `obj`. See `exists` method for the description of the fields. """ raise NotImplementedError() - def update_from_file(self, dataset_id, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, file_name=None, create=False): + def update_from_file(self, obj, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, file_name=None, create=False): """ Inform the store that the file associated with the object has been updated. If `file_name` is provided, update from that file instead @@ -166,14 +155,14 @@ :type file_name: string :param file_name: Use file pointed to by `file_name` as the source for - updating the dataset identified by `dataset_id` + updating the dataset identified by `obj` :type create: bool :param create: If True and the default dataset does not exist, create it first. """ raise NotImplementedError() - def get_object_url(self, dataset_id, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def get_object_url(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ If the store supports direct URL access, return a URL. Otherwise return None. @@ -210,22 +199,24 @@ super(DiskObjectStore, self).__init__() self.file_path = file_path or config.file_path self.config = config + self.extra_dirs['job_work'] = config.job_working_directory + self.extra_dirs['temp'] = config.new_file_path if extra_dirs is not None: - self.extra_dirs = extra_dirs + self.extra_dirs.update( extra_dirs ) - def _get_filename(self, dataset_id, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def _get_filename(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): """Class method that returns the absolute path for the file corresponding - to the `dataset_id` regardless of whether the file exists. + to the `obj`.id regardless of whether the file exists. """ - path = self._construct_path(dataset_id, dir_only=dir_only, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name, old_style=True) + path = self._construct_path(obj, base_dir=base_dir, dir_only=dir_only, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name, old_style=True) # For backward compatibility, check the old style root path first; otherwise, # construct hashed path if not os.path.exists(path): - return self._construct_path(dataset_id, dir_only=dir_only, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name) + return self._construct_path(obj, base_dir=base_dir, dir_only=dir_only, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name) - def _construct_path(self, dataset_id, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): """ Construct the expected absolute path for accessing the object - identified by `dataset_id`. + identified by `obj`.id. :type base_dir: string :param base_dir: A key in self.extra_dirs corresponding to the base @@ -234,13 +225,13 @@ :type dir_only: bool :param dir_only: If True, check only the path where the file - identified by `dataset_id` should be located, not the + identified by `obj` should be located, not the dataset itself. This option applies to `extra_dir` argument as well. :type extra_dir: string :param extra_dir: Append the value of this parameter to the expected path - used to access the object identified by `dataset_id` + used to access the object identified by `obj` (e.g., /files/000/<extra_dir>/dataset_10.dat). :type alt_name: string @@ -252,16 +243,16 @@ the composed directory structure does not include a hash id (e.g., /files/dataset_10.dat (old) vs. /files/000/dataset_10.dat (new)) """ - base = self.file_path - if base_dir in self.extra_dirs: - base = self.extra_dirs.get(base_dir) + base = self.extra_dirs.get(base_dir, self.file_path) if old_style: if extra_dir is not None: path = os.path.join(base, extra_dir) else: path = base else: - rel_path = os.path.join(*directory_hash_id(dataset_id)) + # Construct hashed path + rel_path = os.path.join(*directory_hash_id(obj.id)) + # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) @@ -269,103 +260,86 @@ rel_path = os.path.join(rel_path, extra_dir) path = os.path.join(base, rel_path) if not dir_only: - path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % dataset_id) + path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return os.path.abspath(path) - def exists(self, dataset_id, **kwargs): - path = self._construct_path(dataset_id, old_style=True, **kwargs) + def exists(self, obj, **kwargs): + path = self._construct_path(obj, old_style=True, **kwargs) # For backward compatibility, check root path first; otherwise, construct # and check hashed path - if not os.path.exists(path): - path = self._construct_path(dataset_id, **kwargs) - return os.path.exists(path) + if os.path.exists(path): + return True + else: + path = self._construct_path(obj, **kwargs) + return os.path.exists(path) - def create(self, dataset_id, **kwargs): - kwargs.pop('store_name', None) - if not self.exists(dataset_id, **kwargs): - # Pull out locally used fields - extra_dir = kwargs.get('extra_dir', None) - extra_dir_at_root = kwargs.get('extra_dir_at_root', False) + def create(self, obj, **kwargs): + if not self.exists(obj, **kwargs): + path = self._construct_path(obj, **kwargs) dir_only = kwargs.get('dir_only', False) - alt_name = kwargs.get('alt_name', None) - base_dir_key = kwargs.get('base_dir', None) - # Construct hashed path - path = os.path.join(*directory_hash_id(dataset_id)) - # Optionally append extra_dir - if extra_dir is not None: - if extra_dir_at_root: - path = os.path.join(extra_dir, path) - else: - path = os.path.join(path, extra_dir) - # Combine the constructted path with the root dir for all files - base_dir = self.extra_dirs.get(base_dir_key, self.file_path) - path = os.path.join(base_dir, path) # Create directory if it does not exist - if not os.path.exists(path): - os.makedirs(path) + dir = path if dir_only else os.path.dirname(path) + if not os.path.exists(dir): + os.makedirs(dir) + # Create the file if it does not exist if not dir_only: - path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % dataset_id) open(path, 'w').close() - util.umask_fix_perms( path, self.config.umask, 0666 ) + util.umask_fix_perms(path, self.config.umask, 0666) - def empty(self, dataset_id, **kwargs): - return os.path.getsize(self.get_filename(dataset_id, **kwargs)) > 0 + def empty(self, obj, **kwargs): + return os.path.getsize(self.get_filename(obj, **kwargs)) > 0 - def size(self, dataset_id, **kwargs): - if self.exists(dataset_id, **kwargs): + def size(self, obj, **kwargs): + if self.exists(obj, **kwargs): try: - return os.path.getsize(self.get_filename(dataset_id, **kwargs)) + return os.path.getsize(self.get_filename(obj, **kwargs)) except OSError: return 0 else: return 0 - def delete(self, dataset_id, entire_dir=False, **kwargs): - path = self.get_filename(dataset_id, **kwargs) + def delete(self, obj, entire_dir=False, **kwargs): + path = self.get_filename(obj, **kwargs) extra_dir = kwargs.get('extra_dir', None) try: if entire_dir and extra_dir: shutil.rmtree(path) return True - if self.exists(dataset_id, **kwargs): + if self.exists(obj, **kwargs): os.remove(path) return True except OSError, ex: - log.critical('%s delete error %s' % (self._get_filename(dataset_id, **kwargs), ex)) + log.critical('%s delete error %s' % (self._get_filename(obj, **kwargs), ex)) return False - def get_data(self, dataset_id, start=0, count=-1, **kwargs): - data_file = open(self.get_filename(dataset_id, **kwargs), 'r') + def get_data(self, obj, start=0, count=-1, **kwargs): + data_file = open(self.get_filename(obj, **kwargs), 'r') data_file.seek(start) content = data_file.read(count) data_file.close() return content - def get_filename(self, dataset_id, **kwargs): - path = self._construct_path(dataset_id, old_style=True, **kwargs) + def get_filename(self, obj, **kwargs): + path = self._construct_path(obj, old_style=True, **kwargs) # For backward compatibility, check root path first; otherwise, construct - # and check hashed path + # and return hashed path if os.path.exists(path): return path else: - path = self._construct_path(dataset_id, **kwargs) - if os.path.exists(path): - return path - else: - raise ObjectNotFound() + return self._construct_path(obj, **kwargs) - def update_from_file(self, dataset_id, file_name=None, create=False, **kwargs): + def update_from_file(self, obj, file_name=None, create=False, **kwargs): """ `create` parameter is not used in this implementation """ if create: - self.create(dataset_id, **kwargs) - if file_name and self.exists(dataset_id, **kwargs): + self.create(obj, **kwargs) + if file_name and self.exists(obj, **kwargs): try: - shutil.copy(file_name, self.get_filename(dataset_id, **kwargs)) + shutil.copy(file_name, self.get_filename(obj, **kwargs)) except IOError, ex: log.critical('Error copying %s to %s: %s' % (file_name, - self._get_filename(dataset_id, **kwargs), ex)) + self._get_filename(obj, **kwargs), ex)) - def get_object_url(self, dataset_id, **kwargs): + def get_object_url(self, obj, **kwargs): return None @@ -494,8 +468,8 @@ continue util.umask_fix_perms( path, self.config.umask, 0666, self.config.gid ) - def _construct_path(self, dataset_id, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None): - rel_path = os.path.join(*directory_hash_id(dataset_id)) + def _construct_path(self, obj, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None): + rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) @@ -504,7 +478,7 @@ # S3 folders are marked by having trailing '/' so add it now rel_path = '%s/' % rel_path if not dir_only: - rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % dataset_id) + rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return rel_path def _get_cache_path(self, rel_path): @@ -665,18 +639,18 @@ log.error("Trouble pushing S3 key '%s' from file '%s': %s" % (rel_path, source_file, ex)) return False - def file_ready(self, dataset_id, **kwargs): + def file_ready(self, obj, **kwargs): """ A helper method that checks if a file corresponding to a dataset is ready and available to be used. Return True if so, False otherwise.""" - rel_path = self._construct_path(dataset_id, **kwargs) + rel_path = self._construct_path(obj, **kwargs) # Make sure the size in cache is available in its entirety if self._in_cache(rel_path) and os.path.getsize(self._get_cache_path(rel_path)) == self._get_size_in_s3(rel_path): return True return False - def exists(self, dataset_id, **kwargs): + def exists(self, obj, **kwargs): in_cache = in_s3 = False - rel_path = self._construct_path(dataset_id, **kwargs) + rel_path = self._construct_path(obj, **kwargs) # Check cache if self._in_cache(rel_path): in_cache = True @@ -699,9 +673,8 @@ else: return False - def create(self, dataset_id, **kwargs): - kwargs.pop('store_name', None) - if not self.exists(dataset_id, **kwargs): + def create(self, obj, **kwargs): + if not self.exists(obj, **kwargs): #print "S3 OS creating a dataset with ID %s" % dataset_id # Pull out locally used fields extra_dir = kwargs.get('extra_dir', None) @@ -710,7 +683,7 @@ alt_name = kwargs.get('alt_name', None) # print "---- Processing: %s; %s" % (alt_name, locals()) # Construct hashed path - rel_path = os.path.join(*directory_hash_id(dataset_id)) + rel_path = os.path.join(*directory_hash_id(obj)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: @@ -728,30 +701,30 @@ # self._push_to_s3(s3_dir, from_string='') # If instructed, create the dataset in cache & in S3 if not dir_only: - rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % dataset_id) + rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_s3(rel_path, from_string='') - def empty(self, dataset_id, **kwargs): - if self.exists(dataset_id, **kwargs): - return bool(self.size(dataset_id, **kwargs) > 0) + def empty(self, obj, **kwargs): + if self.exists(obj, **kwargs): + return bool(self.size(obj, **kwargs) > 0) else: raise ObjectNotFound() - def size(self, dataset_id, **kwargs): - rel_path = self._construct_path(dataset_id, **kwargs) + def size(self, obj, **kwargs): + rel_path = self._construct_path(obj, **kwargs) if self._in_cache(rel_path): try: return os.path.getsize(self._get_cache_path(rel_path)) except OSError, ex: log.info("Could not get size of file '%s' in local cache, will try S3. Error: %s" % (rel_path, ex)) - elif self.exists(dataset_id, **kwargs): + elif self.exists(obj, **kwargs): return self._get_size_in_s3(rel_path) log.warning("Did not find dataset '%s', returning 0 for size" % rel_path) return 0 - def delete(self, dataset_id, entire_dir=False, **kwargs): - rel_path = self._construct_path(dataset_id, **kwargs) + def delete(self, obj, entire_dir=False, **kwargs): + rel_path = self._construct_path(obj, **kwargs) extra_dir = kwargs.get('extra_dir', None) try: # For the case of extra_files, because we don't have a reference to @@ -777,11 +750,11 @@ except S3ResponseError, ex: log.error("Could not delete key '%s' from S3: %s" % (rel_path, ex)) except OSError, ex: - log.error('%s delete error %s' % (self._get_filename(dataset_id, **kwargs), ex)) + log.error('%s delete error %s' % (self._get_filename(obj, **kwargs), ex)) return False - def get_data(self, dataset_id, start=0, count=-1, **kwargs): - rel_path = self._construct_path(dataset_id, **kwargs) + def get_data(self, obj, start=0, count=-1, **kwargs): + rel_path = self._construct_path(obj, **kwargs) # Check cache first and get file if not there if not self._in_cache(rel_path): self._pull_into_cache(rel_path) @@ -794,10 +767,10 @@ data_file.close() return content - def get_filename(self, dataset_id, **kwargs): + def get_filename(self, obj, **kwargs): #print "S3 get_filename for dataset: %s" % dataset_id dir_only = kwargs.get('dir_only', False) - rel_path = self._construct_path(dataset_id, **kwargs) + rel_path = self._construct_path(obj, **kwargs) cache_path = self._get_cache_path(rel_path) # S3 does not recognize directories as files so cannot check if those exist. # So, if checking dir only, ensure given dir exists in cache and return @@ -811,7 +784,7 @@ if self._in_cache(rel_path): return cache_path # Check if the file exists in persistent storage and, if it does, pull it into cache - elif self.exists(dataset_id, **kwargs): + elif self.exists(obj, **kwargs): if dir_only: # Directories do not get pulled into cache return cache_path else: @@ -824,11 +797,11 @@ raise ObjectNotFound() # return cache_path # Until the upload tool does not explicitly create the dataset, return expected path - def update_from_file(self, dataset_id, file_name=None, create=False, **kwargs): + def update_from_file(self, obj, file_name=None, create=False, **kwargs): if create: - self.create(dataset_id, **kwargs) - if self.exists(dataset_id, **kwargs): - rel_path = self._construct_path(dataset_id, **kwargs) + self.create(obj, **kwargs) + if self.exists(obj, **kwargs): + rel_path = self._construct_path(obj, **kwargs) # Chose whether to use the dataset file itself or an alternate file if file_name: source_file = os.path.abspath(file_name) @@ -848,9 +821,9 @@ else: raise ObjectNotFound() - def get_object_url(self, dataset_id, **kwargs): - if self.exists(dataset_id, **kwargs): - rel_path = self._construct_path(dataset_id, **kwargs) + def get_object_url(self, obj, **kwargs): + if self.exists(obj, **kwargs): + rel_path = self._construct_path(obj, **kwargs) try: key = Key(self.bucket, rel_path) return key.generate_url(expires_in = 86400) # 24hrs @@ -873,7 +846,7 @@ "requires a config file, please set one in " \ "'distributed_object_store_config_file')" self.backends = {} - self.weighted_backend_names = [] + self.weighted_backend_ids = [] random.seed() @@ -884,7 +857,7 @@ root = tree.getroot() log.debug('Loading backends for distributed object store from %s' % self.distributed_config) for elem in [ e for e in root if e.tag == 'backend' ]: - name = elem.get('name') + id = elem.get('id') weight = int(elem.get('weight', 1)) if elem.get('type', 'disk'): path = None @@ -895,94 +868,90 @@ elif sub.tag == 'extra_dir': type = sub.get('type') extra_dirs[type] = sub.get('path') - self.backends[name] = DiskObjectStore(config, file_path=path, extra_dirs=extra_dirs) - log.debug("Loaded disk backend '%s' with weight %s and file_path: %s" % (name, weight, path)) + self.backends[id] = DiskObjectStore(config, file_path=path, extra_dirs=extra_dirs) + log.debug("Loaded disk backend '%s' with weight %s and file_path: %s" % (id, weight, path)) if extra_dirs: log.debug(" Extra directories:") for type, dir in extra_dirs.items(): log.debug(" %s: %s" % (type, dir)) for i in range(0, weight): - # The simplest way to do weighting: add backend names to a + # The simplest way to do weighting: add backend ids to a # sequence the number of times equalling weight, then randomly # choose a backend from that sequence at creation - self.weighted_backend_names.append(name) + self.weighted_backend_ids.append(id) - def exists(self, dataset_id, **kwargs): - store = self.__get_store_for(dataset_id, **kwargs) - return store is not None + def exists(self, obj, **kwargs): + return self.__call_method('exists', obj, False, False, **kwargs) - def store_name(self, dataset_id, **kwargs): - for name, store in self.backends.items(): - if store.exists(dataset_id, **kwargs): - return name - return None + #def store_id(self, obj, **kwargs): + # return self.__get_store_id_for(obj, **kwargs)[0] - def file_ready(self, dataset_id, **kwargs): - store = self.__get_store_for(dataset_id, **kwargs) - if store is not None: - return store.file_ready(dataset_id, **kwargs) - return False + def file_ready(self, obj, **kwargs): + return self.__call_method('file_ready', obj, False, False, **kwargs) - def create(self, dataset_id, **kwargs): - store_name = kwargs.pop('store_name', None) - if not self.exists(dataset_id, **kwargs): - if store_name is None or store_name not in self.backends: - store_name = random.choice(self.weighted_backend_names) - log.debug("Selected backend '%s' for creation of dataset %s" % (store_name, dataset_id)) + def create(self, obj, **kwargs): + """ + create() is the only method in which obj.object_store_id may be None + """ + if obj.object_store_id is None or not self.exists(obj, **kwargs): + if obj.object_store_id is None or obj.object_store_id not in self.backends: + obj.object_store_id = random.choice(self.weighted_backend_ids) + object_session( obj ).add( obj ) + object_session( obj ).flush() + log.debug("Selected backend '%s' for creation of %s %s" % (obj.object_store_id, obj.__class__.__name__, obj.id)) else: - log.debug("Using preferred backend '%s' for creation of dataset %s" % (store_name, dataset_id)) - return self.backends[store_name].create(dataset_id, **kwargs) + log.debug("Using preferred backend '%s' for creation of %s %s" % (obj.object_store_id, obj.__class__.__name__, obj.id)) + self.backends[obj.object_store_id].create(obj, **kwargs) - def empty(self, dataset_id, **kwargs): - store = self.__get_store_for(dataset_id, **kwargs) - if store is not None: - return store.empty(dataset_id, **kwargs) - return True + def empty(self, obj, **kwargs): + return self.__call_method('empty', obj, True, False, **kwargs) - def size(self, dataset_id, **kwargs): - store = self.__get_store_for(dataset_id, **kwargs) - if store is not None: - return store.size(dataset_id, **kwargs) - return 0 + def size(self, obj, **kwargs): + return self.__call_method('size', obj, 0, False, **kwargs) - def delete(self, dataset_id, entire_dir=False, **kwargs): - store = self.__get_store_for(dataset_id, **kwargs) - if store is not None: - return store.delete(dataset_id, entire_dir=entire_dir, **kwargs) - return False + def delete(self, obj, entire_dir=False, **kwargs): + return self.__call_method('delete', obj, False, False, **kwargs) - def get_data(self, dataset_id, start=0, count=-1, **kwargs): - store = self.__get_store_for(dataset_id, **kwargs) - if store is not None: - return store.get_data(dataset_id, start=0, count=-1, **kwargs) - raise ObjectNotFound() + def get_data(self, obj, start=0, count=-1, **kwargs): + return self.__call_method('get_data', obj, ObjectNotFound, True, **kwargs) - def get_filename(self, dataset_id, **kwargs): - store = self.__get_store_for(dataset_id, **kwargs) - if store is not None: - return store.get_filename(dataset_id, **kwargs) - raise ObjectNotFound() + def get_filename(self, obj, **kwargs): + return self.__call_method('get_filename', obj, ObjectNotFound, True, **kwargs) - def update_from_file(self, dataset_id, file_name=None, create=False, **kwargs): - store = self.__get_store_for(dataset_id, **kwargs) - if store is not None: - return store.update_from_file(dataset_id, file_name=file_name, create=create, **kwargs) - if create: - store_name = random.choice(self.weighted_backend_names) - return self.backends[store_name].update_from_file(dataset_id, file_name=file_name, create=create, **kwargs) - raise ObjectNotFound() + def update_from_file(self, obj, file_name=None, create=False, **kwargs): + # can raise ObjectLocationMismatch + # TODO: handling create=True here? probably not since create() is called from w/in, so a store will be selected there + #if create and not self.exists(obj, **kwargs): + # store_id = random.choice(self.weighted_backend_names) + return self.__call_method('update_from_file', obj, ObjectNotFound, True, **kwargs) - def get_object_url(self, dataset_id, **kwargs): - # FIXME: dir_only - store = self.__get_store_for(dataset_id, **kwargs) - if store is not None: - return store.get_object_url(dataset_id, **kwargs) - return None + def get_object_url(self, obj, **kwargs): + return self.__call_method('get_object_url', obj, None, False, **kwargs) - def __get_store_for(self, dataset_id, **kwargs): - for store in self.backends.values(): - if store.exists(dataset_id, **kwargs): - return store + def __call_method(self, method, obj, default, default_is_exception, **kwargs): + object_store_id = self.__get_store_id_for(obj, **kwargs) + if object_store_id is not None: + return self.backends[object_store_id].__getattribute__(method)(obj, **kwargs) + if default_is_exception: + raise default() + else: + return default + + def __get_store_id_for(self, obj, **kwargs): + if obj.object_store_id is not None and obj.object_store_id in self.backends: + return obj.object_store_id + else: + # if this instance has been switched from a non-distributed to a + # distributed object store, or if the object's store id is invalid, + # try to locate the object + log.warning('The backend object store ID (%s) for %s object with ID %s is invalid' % (obj.object_store_id, obj.__class__.__name__, obj.id)) + for id, store in self.backends.items(): + if store.exists(obj, **kwargs): + log.warning('%s object with ID %s found in backend object store with ID %s' % (obj.__class__.__name__, obj.id, id)) + obj.object_store_id = id + object_session( obj ).add( obj ) + object_session( obj ).flush() + return id return None class HierarchicalObjectStore(ObjectStore): diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -1859,7 +1859,7 @@ a_files = os.listdir( temp_file_path ) if len( a_files ) > 0: for f in a_files: - self.app.object_store.update_from_file(hda.dataset.id, + self.app.object_store.update_from_file(hda.dataset, extra_dir="dataset_%d_files" % hda.dataset.id, alt_name = f, file_name = os.path.join(temp_file_path, f), @@ -1899,7 +1899,7 @@ sa_session=self.sa_session ) self.app.security_agent.copy_dataset_permissions( outdata.dataset, child_dataset.dataset ) # Move data from temp location to dataset location - self.app.object_store.update_from_file(child_dataset.dataset.id, filename, create=True) + self.app.object_store.update_from_file(child_dataset.dataset, filename, create=True) self.sa_session.add( child_dataset ) self.sa_session.flush() child_dataset.set_size() @@ -1967,7 +1967,7 @@ self.sa_session.add( primary_data ) self.sa_session.flush() # Move data from temp location to dataset location - self.app.object_store.update_from_file(primary_data.dataset.id, filename, create=True) + self.app.object_store.update_from_file(primary_data.dataset, filename, create=True) primary_data.set_size() primary_data.name = "%s (%s)" % ( outdata.name, designation ) primary_data.info = outdata.info diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -228,8 +228,7 @@ # datasets first, then create the associations parent_to_child_pairs = [] child_dataset_names = set() - store_name = None - store_name_set = False # this is needed since None is a valid value for store_name + object_store_id = None for name, output in tool.outputs.items(): for filter in output.filters: try: @@ -292,12 +291,12 @@ trans.sa_session.add( data ) trans.sa_session.flush() trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions ) - # Create an empty file immediately - trans.app.object_store.create( data.id, store_name=store_name ) - if not store_name_set: - # Ensure all other datasets in this job are created in the same store - store_name = trans.app.object_store.store_name( data.id ) - store_name_set = True + # Create an empty file immediately. The first dataset will be + # created in the "default" store, all others will be created in + # the same store as the first. + data.dataset.object_store_id = object_store_id + trans.app.object_store.create( data.dataset ) + object_store_id = data.dataset.object_store_id # these will be the same thing after the first output # This may not be neccesary with the new parent/child associations data.designation = name # Copy metadata from one of the inputs if requested. @@ -382,6 +381,7 @@ job.add_input_dataset( name, None ) for name, dataset in out_data.iteritems(): job.add_output_dataset( name, dataset ) + job.object_store_id = object_store_id trans.sa_session.add( job ) trans.sa_session.flush() # Some tools are not really executable, but jobs are still created for them ( for record keeping ). diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/tools/actions/upload_common.py --- a/lib/galaxy/tools/actions/upload_common.py +++ b/lib/galaxy/tools/actions/upload_common.py @@ -319,8 +319,7 @@ for name, value in tool.params_to_strings( params, trans.app ).iteritems(): job.add_parameter( name, value ) job.add_parameter( 'paramfile', to_json_string( json_file_path ) ) - store_name = None - store_name_set = False # this is needed since None is a valid value for store_name + object_store_id = None for i, dataset in enumerate( data_list ): if folder: job.add_output_library_dataset( 'output%i' % i, dataset ) @@ -328,11 +327,12 @@ job.add_output_dataset( 'output%i' % i, dataset ) # Create an empty file immediately if not dataset.dataset.external_filename: - trans.app.object_store.create( dataset.dataset.id, store_name=store_name ) + dataset.dataset.object_store_id = object_store_id + trans.app.object_store.create( dataset.dataset ) + object_store_id = dataset.dataset.object_store_id + trans.sa_session.add( dataset ) # open( dataset.file_name, "w" ).close() - if not store_name_set: - store_name = trans.app.object_store.store_name( dataset.dataset.id ) - store_name_set = True + job.object_store_id = object_store_id job.state = job.states.NEW trans.sa_session.add( job ) trans.sa_session.flush() diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/web/controllers/dataset.py --- a/lib/galaxy/web/controllers/dataset.py +++ b/lib/galaxy/web/controllers/dataset.py @@ -360,16 +360,16 @@ data = self._check_dataset(trans, dataset_id) if isinstance( data, basestring ): return data - log.debug( "dataset.py -> transfer_status: Checking transfer status for dataset %s..." % data.id ) + log.debug( "Checking transfer status for dataset %s..." % data.dataset.id ) # Pulling files in extra_files_path into cache is not handled via this # method but that's primarily because those files are typically linked to # through tool's output page anyhow so tying a JavaScript event that will # call this method does not seem doable? - if trans.app.object_store.file_ready(data.id): + if data.dataset.external_filename: return True else: - return False + return trans.app.object_store.file_ready(data.dataset) @web.expose def display(self, trans, dataset_id=None, preview=False, filename=None, to_ext=None, **kwd): @@ -382,7 +382,7 @@ if filename and filename != "index": # For files in extra_files_path - file_path = trans.app.object_store.get_filename(data.dataset.id, extra_dir='dataset_%s_files' % data.dataset.id, alt_name=filename) + file_path = trans.app.object_store.get_filename(data.dataset, extra_dir='dataset_%s_files' % data.dataset.id, alt_name=filename) if os.path.exists( file_path ): if os.path.isdir( file_path ): return trans.show_error_message( "Directory listing is not allowed." ) #TODO: Reconsider allowing listing of directories? diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/web/controllers/history.py --- a/lib/galaxy/web/controllers/history.py +++ b/lib/galaxy/web/controllers/history.py @@ -663,7 +663,7 @@ trans.response.set_content_type( 'application/x-gzip' ) else: trans.response.set_content_type( 'application/x-tar' ) - return trans.app.object_store.get_data(jeha.dataset.id) + return trans.app.object_store.get_data(jeha.dataset) elif jeha.job.state in [ model.Job.states.RUNNING, model.Job.states.QUEUED, model.Job.states.WAITING ]: return trans.show_message( "Still exporting history %(n)s; please check back soon. Link: <a href='%(s)s'>%(s)s</a>" \ % ( { 'n' : history.name, 's' : url_for( action="export_archive", id=id, qualified=True ) } ) ) diff -r 38c1df19946ece9670d6cfd8df6baef73655dded -r 4a9f7ff4e2c02bf87d33590061e09ef4e70fb84e lib/galaxy/web/controllers/library_common.py --- a/lib/galaxy/web/controllers/library_common.py +++ b/lib/galaxy/web/controllers/library_common.py @@ -1647,6 +1647,7 @@ for ldda_id in ldda_ids: try: ldda = trans.sa_session.query( trans.app.model.LibraryDatasetDatasetAssociation ).get( trans.security.decode_id( ldda_id ) ) + assert not ldda.dataset.purged lddas.append( ldda ) except: ldda = None Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket