commit/galaxy-central: natefoo: Object Store: Store the backend object store's ID in the database when using the distributed object store.

9 Jan 2012

1 new commit in galaxy-central:


https://bitbucket.org/galaxy/galaxy-central/changeset/ff576e7f989c/
changeset:   ff576e7f989c
user:        natefoo
date:        2012-01-09 16:06:23
summary:     Object Store: Store the backend object store's ID in the database when using the distributed object store.
affected #:  14 files

diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf distributed_object_store_conf.xml.sample

--- a/distributed_object_store_conf.xml.sample
+++ b/distributed_object_store_conf.xml.sample
@@ -1,11 +1,11 @@
 <?xml version="1.0"?><backends>
-    <backend name="files1" type="disk" weight="1">
+    <backend id="files1" type="disk" weight="1"><files_dir path="database/files1"/><extra_dir type="temp" path="database/tmp1"/><extra_dir type="job_work" path="database/job_working_directory1"/></backend>
-    <backend name="files2" type="disk" weight="1">
+    <backend id="files2" type="disk" weight="1"><files_dir path="database/files2"/><extra_dir type="temp" path="database/tmp2"/><extra_dir type="job_work" path="database/job_working_directory2"/>


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/datatypes/metadata.py
--- a/lib/galaxy/datatypes/metadata.py
+++ b/lib/galaxy/datatypes/metadata.py
@@ -439,7 +439,7 @@
             if mf is None:
                 mf = self.new_file( dataset = parent, **value.kwds )
             # Ensure the metadata file gets updated with content
-            parent.dataset.object_store.update_from_file( parent.dataset.id, file_name=value.file_name, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name=os.path.basename(mf.file_name) )
+            parent.dataset.object_store.update_from_file( parent.dataset, file_name=value.file_name, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name=os.path.basename(mf.file_name) )
             os.unlink( value.file_name )
             value = mf.id
         return value


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/exceptions/__init__.py
--- a/lib/galaxy/exceptions/__init__.py
+++ b/lib/galaxy/exceptions/__init__.py
@@ -22,3 +22,7 @@
 class ObjectNotFound( Exception ):
     """ Accessed object was not found """
     pass
+
+class ObjectInvalid( Exception ):
+    """ Accessed object store ID is invalid """
+    pass


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/jobs/__init__.py
--- a/lib/galaxy/jobs/__init__.py
+++ b/lib/galaxy/jobs/__init__.py
@@ -320,19 +320,9 @@
         # With job outputs in the working directory, we need the working
         # directory to be set before prepare is run, or else premature deletion
         # and job recovery fail.
-        # Attempt to put the working directory in the same store as the output dataset(s)
-        store_name = None
-        da = None
-        if job.output_datasets:
-            da = job.output_datasets[0]
-        elif job.output_library_datasets:
-            da = job.output_library_datasets[0]
-        if da is not None:
-            store_name = self.app.object_store.store_name(da.dataset.id)
         # Create the working dir if necessary
-        if not self.app.object_store.exists(self.job_id, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id)):
-            self.app.object_store.create(self.job_id, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id), store_name=store_name)
-        self.working_directory = self.app.object_store.get_filename(self.job_id, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
+        self.app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
+        self.working_directory = self.app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
         log.debug('(%s) Working directory for job is: %s' % (self.job_id, self.working_directory))
         self.output_paths = None
         self.output_dataset_paths = None
@@ -482,7 +472,7 @@
                     dataset.extension = 'data'
                 # Update (non-library) job output datasets through the object store
                 if dataset not in job.output_library_datasets:
-                    self.app.object_store.update_from_file(dataset.id, create=True)
+                    self.app.object_store.update_from_file(dataset.dataset, create=True)
                 self.sa_session.add( dataset )
                 self.sa_session.flush()
             job.state = job.states.ERROR
@@ -606,7 +596,7 @@
                 dataset.set_size()
                 # Update (non-library) job output datasets through the object store
                 if dataset not in job.output_library_datasets:
-                    self.app.object_store.update_from_file(dataset.id, create=True)
+                    self.app.object_store.update_from_file(dataset.dataset, create=True)
                 if context['stderr']:
                     dataset.blurb = "error"
                 elif dataset.has_data():
@@ -719,8 +709,7 @@
         try:
             for fname in self.extra_filenames:
                 os.remove( fname )
-            if self.working_directory is not None and os.path.isdir( self.working_directory ):
-                shutil.rmtree( self.working_directory )
+            self.app.object_store.delete(self.get_job(), base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
             if self.app.config.set_metadata_externally:
                 self.external_output_metadata.cleanup_external_metadata( self.sa_session )
             galaxy.tools.imp_exp.JobExportHistoryArchiveWrapper( self.job_id ).cleanup_after_job( self.sa_session )


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -16,7 +16,6 @@
 from galaxy.util.hash_util import *
 from galaxy.web.form_builder import *
 from galaxy.model.item_attrs import UsesAnnotations, APIItem
-from galaxy.exceptions import ObjectNotFound
 from sqlalchemy.orm import object_session
 from sqlalchemy.sql.expression import func
 import os.path, os, errno, codecs, operator, socket, pexpect, logging, time, shutil
@@ -650,12 +649,7 @@
         if not self.external_filename:
             assert self.id is not None, "ID must be set before filename used (commit the object)"
             assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id
-            try:
-                filename = self.object_store.get_filename( self.id )
-            except ObjectNotFound, e:
-                # Create file if it does not exist
-                self.object_store.create( self.id )
-                filename = self.object_store.get_filename( self.id )
+            filename = self.object_store.get_filename( self )
             return filename
         else:
             filename = self.external_filename
@@ -669,7 +663,7 @@
     file_name = property( get_file_name, set_file_name )
     @property
     def extra_files_path( self ):
-        return self.object_store.get_filename( self.id, dir_only=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id)
+        return self.object_store.get_filename( self, dir_only=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id )
     def get_size( self, nice_size=False ):
         """Returns the size of the data on disk"""
         if self.file_size:
@@ -679,16 +673,16 @@
                 return self.file_size
         else:
             if nice_size:
-                return galaxy.datatypes.data.nice_size( self.object_store.size(self.id) )
+                return galaxy.datatypes.data.nice_size( self.object_store.size(self) )
             else:
-                return self.object_store.size(self.id)
+                return self.object_store.size(self)
     def set_size( self ):
         """Returns the size of the data on disk"""
         if not self.file_size:
             if self.external_filename:
                 self.file_size = os.path.getsize(self.external_filename)
             else:
-                self.file_size = self.object_store.size(self.id)
+                self.file_size = self.object_store.size(self)
     def get_total_size( self ):
         if self.total_size is not None:
             return self.total_size
@@ -703,7 +697,7 @@
         if self.file_size is None:
             self.set_size()
         self.total_size = self.file_size or 0
-        if self.object_store.exists(self.id, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
+        if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
             for root, dirs, files in os.walk( self.extra_files_path ):
                 self.total_size += sum( [ os.path.getsize( os.path.join( root, file ) ) for file in files ] )
     def has_data( self ):
@@ -721,7 +715,7 @@
     # FIXME: sqlalchemy will replace this
     def _delete(self):
         """Remove the file that corresponds to this data"""
-        self.object_store.delete(self.id)
+        self.object_store.delete(self)
     @property
     def user_can_purge( self ):
         return self.purged == False \
@@ -730,9 +724,9 @@
     def full_delete( self ):
         """Remove the file and extra files, marks deleted and purged"""
         # os.unlink( self.file_name )
-        self.object_store.delete(self.id)
-        if self.object_store.exists(self.id, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
-            self.object_store.delete(self.id, entire_dir=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True)
+        self.object_store.delete(self)
+        if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
+            self.object_store.delete(self, entire_dir=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True)
         # if os.path.exists( self.extra_files_path ):
         #     shutil.rmtree( self.extra_files_path )
         # TODO: purge metadata files
@@ -1798,8 +1792,11 @@
         assert self.id is not None, "ID must be set before filename used (commit the object)"
         # Ensure the directory structure and the metadata file object exist
         try:
-            self.history_dataset.dataset.object_store.create( self.id, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id )
-            path = self.history_dataset.dataset.object_store.get_filename( self.id, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id )
+            da = self.history_dataset or self.library_dataset
+            if self.object_store_id is None and da is not None:
+                self.object_store_id = da.dataset.object_store_id
+            da.dataset.object_store.create( self, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id )
+            path = da.dataset.object_store.get_filename( self, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id )
             return path
         except AttributeError:
             # In case we're not working with the history_dataset


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/model/mapping.py
--- a/lib/galaxy/model/mapping.py
+++ b/lib/galaxy/model/mapping.py
@@ -130,6 +130,7 @@
     Column( "deleted", Boolean, index=True, default=False ),
     Column( "purged", Boolean, index=True, default=False ),
     Column( "purgable", Boolean, default=True ),
+    Column( "object_store_id", TrimmedString( 255 ), index=True ),
     Column( "external_filename" , TEXT ),
     Column( "_extra_files_path", TEXT ),
     Column( 'file_size', Numeric( 15, 0 ) ),
@@ -410,6 +411,7 @@
     Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True, nullable=True ),
     Column( "job_runner_name", String( 255 ) ),
     Column( "job_runner_external_id", String( 255 ) ), 
+    Column( "object_store_id", TrimmedString( 255 ), index=True ),
     Column( "imported", Boolean, default=False, index=True ) )
     
 JobParameter.table = Table( "job_parameter", metadata,
@@ -641,6 +643,7 @@
     Column( "lda_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), index=True, nullable=True ),
     Column( "create_time", DateTime, default=now ),
     Column( "update_time", DateTime, index=True, default=now, onupdate=now ),
+    Column( "object_store_id", TrimmedString( 255 ), index=True ),
     Column( "deleted", Boolean, index=True, default=False ),
     Column( "purged", Boolean, index=True, default=False ) )
 


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/model/migrate/versions/0089_add_object_store_id_columns.py
--- /dev/null
+++ b/lib/galaxy/model/migrate/versions/0089_add_object_store_id_columns.py
@@ -0,0 +1,38 @@
+"""
+Migration script to add 'object_store_id' column to various tables
+"""
+
+from sqlalchemy import *
+from sqlalchemy.orm import *
+from migrate import *
+from migrate.changeset import *
+
+import logging
+log = logging.getLogger( __name__ )
+from galaxy.model.custom_types import TrimmedString
+
+metadata = MetaData( migrate_engine )
+db_session = scoped_session( sessionmaker( bind=migrate_engine, autoflush=False, autocommit=True ) )
+
+def upgrade():
+    print __doc__
+    metadata.reflect()
+    for t_name in ( 'dataset', 'job', 'metadata_file' ):
+        t = Table( t_name, metadata, autoload=True )
+        c = Column( "object_store_id", TrimmedString( 255 ), index=True )
+        try:
+            c.create( t )
+            assert c is t.c.object_store_id
+        except Exception, e:
+            print "Adding object_store_id column to %s table failed: %s" % ( t_name, str( e ) )
+            log.debug( "Adding object_store_id column to %s table failed: %s" % ( t_name, str( e ) ) )
+
+def downgrade():
+    metadata.reflect()
+    for t_name in ( 'dataset', 'job', 'metadata_file' ):
+        t = Table( t_name, metadata, autoload=True )
+        try:
+            t.c.object_store_id.drop()
+        except Exception, e:
+            print "Dropping object_store_id column from %s table failed: %s" % ( t_name, str( e ) )
+            log.debug( "Dropping object_store_id column from %s table failed: %s" % ( t_name, str( e ) ) )


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/objectstore/__init__.py
--- a/lib/galaxy/objectstore/__init__.py
+++ b/lib/galaxy/objectstore/__init__.py
@@ -18,7 +18,9 @@
 from galaxy.jobs import Sleeper
 from galaxy.model import directory_hash_id
 from galaxy.objectstore.s3_multipart_upload import multipart_upload
-from galaxy.exceptions import ObjectNotFound
+from galaxy.exceptions import ObjectNotFound, ObjectInvalid
+
+from sqlalchemy.orm import object_session
 
 from boto.s3.key import Key
 from boto.s3.connection import S3Connection
@@ -40,14 +42,15 @@
         self.running = False
         self.extra_dirs = {}
     
-    def exists(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+    def exists(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """
-        Returns True if the object identified by `dataset_id` exists in this 
-        file store, False otherwise.
+        Returns True if the object identified by `obj` exists in this file
+        store, False otherwise.
         
         FIELD DESCRIPTIONS (these apply to all the methods in this class):
-        :type dataset_id: int
-        :param dataset_id: Galaxy-assigned database ID of the dataset to be checked.
+        :type obj: object
+        :param obj: A Galaxy object with an assigned database ID accessible via
+                    the .id attribute.
         
         :type base_dir: string
         :param base_dir: A key in self.extra_dirs corresponding to the base
@@ -56,21 +59,21 @@
 
         :type dir_only: bool
         :param dir_only: If True, check only the path where the file
-                         identified by `dataset_id` should be located, not the
-                         dataset itself. This option applies to `extra_dir`
-                         argument as well.
+                         identified by `obj` should be located, not the dataset
+                         itself. This option applies to `extra_dir` argument as
+                         well.
         
         :type extra_dir: string
         :param extra_dir: Append `extra_dir` to the directory structure where
-                          the dataset identified by `dataset_id` should be located.
-                          (e.g., 000/extra_dir/dataset_id)
+                          the dataset identified by `obj` should be located.
+                          (e.g., 000/extra_dir/obj.id)
         
         :type extra_dir_at_root: bool
         :param extra_dir_at_root: Applicable only if `extra_dir` is set.
                                   If True, the `extra_dir` argument is placed at
                                   root of the created directory structure rather
-                                  than at the end (e.g., extra_dir/000/dataset_id
-                                  vs. 000/extra_dir/dataset_id)
+                                  than at the end (e.g., extra_dir/000/obj.id
+                                  vs. 000/extra_dir/obj.id)
         
         :type alt_name: string
         :param alt_name: Use this name as the alternative name for the created
@@ -78,53 +81,39 @@
         """
         raise NotImplementedError()
 
-    def store_name(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
-        """
-        Returns the name of the store in which the object identified by
-        `dataset_id` exists, or None if it does not exist or the store is the
-        default store.
-        """
-        return None
-    
-    def file_ready(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+    def file_ready(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """ A helper method that checks if a file corresponding to a dataset 
         is ready and available to be used. Return True if so, False otherwise."""
         return True
     
-    def create(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, store_name=None):
+    def create(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """
-        Mark the object identified by `dataset_id` as existing in the store, but
-        with no content. This method will create a proper directory structure for
+        Mark the object identified by `obj` as existing in the store, but with
+        no content. This method will create a proper directory structure for
         the file if the directory does not already exist.
         See `exists` method for the description of other fields.
-
-        :type store_name: string
-        :param store_name: Backend store in which to create the dataset, if
-                           this store contains more than one backend.  If the
-                           named backend does not exist, a backend will be
-                           chosen by the store.
         """
         raise NotImplementedError()
-    
-    def empty(self, dataset_id, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+
+    def empty(self, obj, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """
-        Test if the object identified by `dataset_id` has content.
+        Test if the object identified by `obj` has content.
         If the object does not exist raises `ObjectNotFound`.
         See `exists` method for the description of the fields.
         """
         raise NotImplementedError()
     
-    def size(self, dataset_id, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+    def size(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """
-        Return size of the object identified by `dataset_id`.
+        Return size of the object identified by `obj`.
         If the object does not exist, return 0.
         See `exists` method for the description of the fields.
         """
         raise NotImplementedError()
     
-    def delete(self, dataset_id, entire_dir=False, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+    def delete(self, obj, entire_dir=False, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """
-        Deletes the object identified by `dataset_id`.
+        Deletes the object identified by `obj`.
         See `exists` method for the description of other fields.
         :type entire_dir: bool
         :param entire_dir: If True, delete the entire directory pointed to by 
@@ -133,10 +122,10 @@
         """
         raise NotImplementedError()
 
-    def get_data(self, dataset_id, start=0, count=-1, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+    def get_data(self, obj, start=0, count=-1, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """
         Fetch `count` bytes of data starting at offset `start` from the
-        object identified uniquely by `dataset_id`.
+        object identified uniquely by `obj`.
         If the object does not exist raises `ObjectNotFound`.
         See `exists` method for the description of other fields.
         
@@ -148,15 +137,15 @@
         """
         raise NotImplementedError()
     
-    def get_filename(self, dataset_id, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+    def get_filename(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """
         Get the expected filename (including the absolute path) which can be used
-        to access the contents of the object uniquely identified by `dataset_id`.
+        to access the contents of the object uniquely identified by `obj`.
         See `exists` method for the description of the fields.
         """
         raise NotImplementedError()
     
-    def update_from_file(self, dataset_id, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, file_name=None, create=False):
+    def update_from_file(self, obj, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, file_name=None, create=False):
         """
         Inform the store that the file associated with the object has been
         updated. If `file_name` is provided, update from that file instead
@@ -166,14 +155,14 @@
         
         :type file_name: string
         :param file_name: Use file pointed to by `file_name` as the source for 
-                         updating the dataset identified by `dataset_id`
+                          updating the dataset identified by `obj`
         
         :type create: bool
         :param create: If True and the default dataset does not exist, create it first.
         """
         raise NotImplementedError()
     
-    def get_object_url(self, dataset_id, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+    def get_object_url(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """
         If the store supports direct URL access, return a URL. Otherwise return
         None.
@@ -210,22 +199,24 @@
         super(DiskObjectStore, self).__init__()
         self.file_path = file_path or config.file_path
         self.config = config
+        self.extra_dirs['job_work'] = config.job_working_directory
+        self.extra_dirs['temp'] = config.new_file_path
         if extra_dirs is not None:
-            self.extra_dirs = extra_dirs
+            self.extra_dirs.update( extra_dirs )
     
-    def _get_filename(self, dataset_id, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+    def _get_filename(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """Class method that returns the absolute path for the file corresponding
-        to the `dataset_id` regardless of whether the file exists. 
+        to the `obj`.id regardless of whether the file exists. 
         """
-        path = self._construct_path(dataset_id, dir_only=dir_only, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name, old_style=True)
+        path = self._construct_path(obj, base_dir=base_dir, dir_only=dir_only, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name, old_style=True)
         # For backward compatibility, check the old style root path first; otherwise, 
         # construct hashed path
         if not os.path.exists(path):
-            return self._construct_path(dataset_id, dir_only=dir_only, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name)
+            return self._construct_path(obj, base_dir=base_dir, dir_only=dir_only, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name)
     
-    def _construct_path(self, dataset_id, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+    def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None):
         """ Construct the expected absolute path for accessing the object
-            identified by `dataset_id`.
+            identified by `obj`.id.
         
         :type base_dir: string
         :param base_dir: A key in self.extra_dirs corresponding to the base
@@ -234,13 +225,13 @@
 
         :type dir_only: bool
         :param dir_only: If True, check only the path where the file
-                         identified by `dataset_id` should be located, not the
+                         identified by `obj` should be located, not the
                          dataset itself. This option applies to `extra_dir`
                          argument as well.
         
         :type extra_dir: string
         :param extra_dir: Append the value of this parameter to the expected path
-                          used to access the object identified by `dataset_id`
+                          used to access the object identified by `obj`
                           (e.g., /files/000/<extra_dir>/dataset_10.dat).
         
         :type alt_name: string
@@ -252,16 +243,16 @@
                          the composed directory structure does not include a hash id
                          (e.g., /files/dataset_10.dat (old) vs. /files/000/dataset_10.dat (new))
         """
-        base = self.file_path
-        if base_dir in self.extra_dirs:
-            base = self.extra_dirs.get(base_dir)
+        base = self.extra_dirs.get(base_dir, self.file_path)
         if old_style:
             if extra_dir is not None:
                 path = os.path.join(base, extra_dir)
             else:
                 path = base
         else:
-            rel_path = os.path.join(*directory_hash_id(dataset_id))
+            # Construct hashed path
+            rel_path = os.path.join(*directory_hash_id(obj.id))
+            # Optionally append extra_dir
             if extra_dir is not None:
                 if extra_dir_at_root:
                     rel_path = os.path.join(extra_dir, rel_path)
@@ -269,103 +260,86 @@
                     rel_path = os.path.join(rel_path, extra_dir)
             path = os.path.join(base, rel_path)
         if not dir_only:
-            path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % dataset_id)
+            path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
         return os.path.abspath(path)
 
-    def exists(self, dataset_id, **kwargs):
-        path = self._construct_path(dataset_id, old_style=True, **kwargs)
+    def exists(self, obj, **kwargs):
+        path = self._construct_path(obj, old_style=True, **kwargs)
         # For backward compatibility, check root path first; otherwise, construct 
         # and check hashed path
-        if not os.path.exists(path):
-            path = self._construct_path(dataset_id, **kwargs)
-        return os.path.exists(path)
+        if os.path.exists(path):
+            return True
+        else:
+            path = self._construct_path(obj, **kwargs)
+            return os.path.exists(path)
 
-    def create(self, dataset_id, **kwargs):
-        kwargs.pop('store_name', None)
-        if not self.exists(dataset_id, **kwargs):
-            # Pull out locally used fields
-            extra_dir = kwargs.get('extra_dir', None)
-            extra_dir_at_root = kwargs.get('extra_dir_at_root', False)
+    def create(self, obj, **kwargs):
+        if not self.exists(obj, **kwargs):
+            path = self._construct_path(obj, **kwargs)
             dir_only = kwargs.get('dir_only', False)
-            alt_name = kwargs.get('alt_name', None)
-            base_dir_key = kwargs.get('base_dir', None)
-            # Construct hashed path
-            path = os.path.join(*directory_hash_id(dataset_id))
-            # Optionally append extra_dir
-            if extra_dir is not None:
-                if extra_dir_at_root:
-                    path = os.path.join(extra_dir, path)
-                else:
-                    path = os.path.join(path, extra_dir)
-            # Combine the constructted path with the root dir for all files
-            base_dir = self.extra_dirs.get(base_dir_key, self.file_path)
-            path = os.path.join(base_dir, path)
             # Create directory if it does not exist
-            if not os.path.exists(path):
-                os.makedirs(path)
+            dir = path if dir_only else os.path.dirname(path)
+            if not os.path.exists(dir):
+                os.makedirs(dir)
+            # Create the file if it does not exist
             if not dir_only:
-                path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % dataset_id)
                 open(path, 'w').close() 
-                util.umask_fix_perms( path, self.config.umask, 0666 )
+                util.umask_fix_perms(path, self.config.umask, 0666)
 
-    def empty(self, dataset_id, **kwargs):
-        return os.path.getsize(self.get_filename(dataset_id, **kwargs)) > 0
+    def empty(self, obj, **kwargs):
+        return os.path.getsize(self.get_filename(obj, **kwargs)) > 0
     
-    def size(self, dataset_id, **kwargs):
-        if self.exists(dataset_id, **kwargs):
+    def size(self, obj, **kwargs):
+        if self.exists(obj, **kwargs):
             try:
-                return os.path.getsize(self.get_filename(dataset_id, **kwargs))
+                return os.path.getsize(self.get_filename(obj, **kwargs))
             except OSError:
                 return 0
         else:
             return 0
     
-    def delete(self, dataset_id, entire_dir=False, **kwargs):
-        path = self.get_filename(dataset_id, **kwargs)
+    def delete(self, obj, entire_dir=False, **kwargs):
+        path = self.get_filename(obj, **kwargs)
         extra_dir = kwargs.get('extra_dir', None)
         try:
             if entire_dir and extra_dir:
                 shutil.rmtree(path)
                 return True
-            if self.exists(dataset_id, **kwargs):
+            if self.exists(obj, **kwargs):
                     os.remove(path)
                     return True
         except OSError, ex:
-            log.critical('%s delete error %s' % (self._get_filename(dataset_id, **kwargs), ex))
+            log.critical('%s delete error %s' % (self._get_filename(obj, **kwargs), ex))
         return False
 
-    def get_data(self, dataset_id, start=0, count=-1, **kwargs):
-        data_file = open(self.get_filename(dataset_id, **kwargs), 'r')
+    def get_data(self, obj, start=0, count=-1, **kwargs):
+        data_file = open(self.get_filename(obj, **kwargs), 'r')
         data_file.seek(start)
         content = data_file.read(count)
         data_file.close()
         return content
     
-    def get_filename(self, dataset_id, **kwargs):
-        path = self._construct_path(dataset_id, old_style=True, **kwargs)
+    def get_filename(self, obj, **kwargs):
+        path = self._construct_path(obj, old_style=True, **kwargs)
         # For backward compatibility, check root path first; otherwise, construct 
-        # and check hashed path
+        # and return hashed path
         if os.path.exists(path):
             return path
         else:
-            path = self._construct_path(dataset_id, **kwargs)
-        if os.path.exists(path):
-            return path
-        else:
-            raise ObjectNotFound()
+            return self._construct_path(obj, **kwargs)
     
-    def update_from_file(self, dataset_id, file_name=None, create=False, **kwargs):
+    def update_from_file(self, obj, file_name=None, create=False, **kwargs):
         """ `create` parameter is not used in this implementation """
         if create:
-            self.create(dataset_id, **kwargs)
-        if file_name and self.exists(dataset_id, **kwargs):
+            self.create(obj, **kwargs)
+        if file_name and self.exists(obj, **kwargs):
             try:
-                shutil.copy(file_name, self.get_filename(dataset_id, **kwargs))
+                shutil.copy(file_name, self.get_filename(obj, **kwargs))
             except IOError, ex:
                 log.critical('Error copying %s to %s: %s' % (file_name, 
-                    self._get_filename(dataset_id, **kwargs), ex))
+                    self._get_filename(obj, **kwargs), ex))
     
-    def get_object_url(self, dataset_id, **kwargs):
+    def get_object_url(self, obj, **kwargs):
         return None
     
 
@@ -494,8 +468,8 @@
                     continue 
                 util.umask_fix_perms( path, self.config.umask, 0666, self.config.gid )
     
-    def _construct_path(self, dataset_id, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None):
-        rel_path = os.path.join(*directory_hash_id(dataset_id))
+    def _construct_path(self, obj, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None):
+        rel_path = os.path.join(*directory_hash_id(obj.id))
         if extra_dir is not None:
             if extra_dir_at_root:
                 rel_path = os.path.join(extra_dir, rel_path)
@@ -504,7 +478,7 @@
         # S3 folders are marked by having trailing '/' so add it now
         rel_path = '%s/' % rel_path
         if not dir_only:
-            rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % dataset_id)
+            rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
         return rel_path
 
     def _get_cache_path(self, rel_path):
@@ -665,18 +639,18 @@
             log.error("Trouble pushing S3 key '%s' from file '%s': %s" % (rel_path, source_file, ex))
         return False
     
-    def file_ready(self, dataset_id, **kwargs):
+    def file_ready(self, obj, **kwargs):
         """ A helper method that checks if a file corresponding to a dataset 
         is ready and available to be used. Return True if so, False otherwise."""
-        rel_path = self._construct_path(dataset_id, **kwargs)
+        rel_path = self._construct_path(obj, **kwargs)
         # Make sure the size in cache is available in its entirety
         if self._in_cache(rel_path) and os.path.getsize(self._get_cache_path(rel_path)) == self._get_size_in_s3(rel_path):
             return True
         return False
     
-    def exists(self, dataset_id, **kwargs):
+    def exists(self, obj, **kwargs):
         in_cache = in_s3 = False
-        rel_path = self._construct_path(dataset_id, **kwargs)
+        rel_path = self._construct_path(obj, **kwargs)
         # Check cache
         if self._in_cache(rel_path):
             in_cache = True
@@ -699,9 +673,8 @@
         else:
             return False
     
-    def create(self, dataset_id, **kwargs):
-        kwargs.pop('store_name', None)
-        if not self.exists(dataset_id, **kwargs):
+    def create(self, obj, **kwargs):
+        if not self.exists(obj, **kwargs):
             #print "S3 OS creating a dataset with ID %s" % dataset_id
             # Pull out locally used fields
             extra_dir = kwargs.get('extra_dir', None)
@@ -710,7 +683,7 @@
             alt_name = kwargs.get('alt_name', None)
             # print "---- Processing: %s; %s" % (alt_name, locals())
             # Construct hashed path
-            rel_path = os.path.join(*directory_hash_id(dataset_id))
+            rel_path = os.path.join(*directory_hash_id(obj))
             # Optionally append extra_dir
             if extra_dir is not None:
                 if extra_dir_at_root:
@@ -728,30 +701,30 @@
             # self._push_to_s3(s3_dir, from_string='')
             # If instructed, create the dataset in cache & in S3
             if not dir_only:
-                rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % dataset_id)
+                rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
                 open(os.path.join(self.staging_path, rel_path), 'w').close()
                 self._push_to_s3(rel_path, from_string='')
     
-    def empty(self, dataset_id, **kwargs):
-        if self.exists(dataset_id, **kwargs):
-            return bool(self.size(dataset_id, **kwargs) > 0)
+    def empty(self, obj, **kwargs):
+        if self.exists(obj, **kwargs):
+            return bool(self.size(obj, **kwargs) > 0)
         else:
             raise ObjectNotFound()
     
-    def size(self, dataset_id, **kwargs):
-        rel_path = self._construct_path(dataset_id, **kwargs)
+    def size(self, obj, **kwargs):
+        rel_path = self._construct_path(obj, **kwargs)
         if self._in_cache(rel_path):
             try:
                 return os.path.getsize(self._get_cache_path(rel_path))
             except OSError, ex:
                 log.info("Could not get size of file '%s' in local cache, will try S3. Error: %s" % (rel_path, ex))
-        elif self.exists(dataset_id, **kwargs):
+        elif self.exists(obj, **kwargs):
             return self._get_size_in_s3(rel_path)
         log.warning("Did not find dataset '%s', returning 0 for size" % rel_path)
         return 0
     
-    def delete(self, dataset_id, entire_dir=False, **kwargs):
-        rel_path = self._construct_path(dataset_id, **kwargs)
+    def delete(self, obj, entire_dir=False, **kwargs):
+        rel_path = self._construct_path(obj, **kwargs)
         extra_dir = kwargs.get('extra_dir', None)
         try:
             # For the case of extra_files, because we don't have a reference to
@@ -777,11 +750,11 @@
         except S3ResponseError, ex:
             log.error("Could not delete key '%s' from S3: %s" % (rel_path, ex))
         except OSError, ex:
-            log.error('%s delete error %s' % (self._get_filename(dataset_id, **kwargs), ex))
+            log.error('%s delete error %s' % (self._get_filename(obj, **kwargs), ex))
         return False
     
-    def get_data(self, dataset_id, start=0, count=-1, **kwargs):
-        rel_path = self._construct_path(dataset_id, **kwargs)
+    def get_data(self, obj, start=0, count=-1, **kwargs):
+        rel_path = self._construct_path(obj, **kwargs)
         # Check cache first and get file if not there
         if not self._in_cache(rel_path):
             self._pull_into_cache(rel_path)
@@ -794,10 +767,10 @@
         data_file.close()
         return content
     
-    def get_filename(self, dataset_id, **kwargs):
+    def get_filename(self, obj, **kwargs):
         #print "S3 get_filename for dataset: %s" % dataset_id
         dir_only = kwargs.get('dir_only', False)
-        rel_path = self._construct_path(dataset_id, **kwargs)
+        rel_path = self._construct_path(obj, **kwargs)
         cache_path = self._get_cache_path(rel_path)
         # S3 does not recognize directories as files so cannot check if those exist.
         # So, if checking dir only, ensure given dir exists in cache and return
@@ -811,7 +784,7 @@
         if self._in_cache(rel_path):
             return cache_path
         # Check if the file exists in persistent storage and, if it does, pull it into cache
-        elif self.exists(dataset_id, **kwargs):
+        elif self.exists(obj, **kwargs):
             if dir_only: # Directories do not get pulled into cache
                 return cache_path
             else:
@@ -824,11 +797,11 @@
         raise ObjectNotFound()
         # return cache_path # Until the upload tool does not explicitly create the dataset, return expected path
     
-    def update_from_file(self, dataset_id, file_name=None, create=False, **kwargs): 
+    def update_from_file(self, obj, file_name=None, create=False, **kwargs): 
         if create:
-            self.create(dataset_id, **kwargs)
-        if self.exists(dataset_id, **kwargs):
-            rel_path = self._construct_path(dataset_id, **kwargs)
+            self.create(obj, **kwargs)
+        if self.exists(obj, **kwargs):
+            rel_path = self._construct_path(obj, **kwargs)
             # Chose whether to use the dataset file itself or an alternate file
             if file_name:
                 source_file = os.path.abspath(file_name)
@@ -848,9 +821,9 @@
         else:
             raise ObjectNotFound()
     
-    def get_object_url(self, dataset_id, **kwargs):
-        if self.exists(dataset_id, **kwargs):
-            rel_path = self._construct_path(dataset_id, **kwargs)
+    def get_object_url(self, obj, **kwargs):
+        if self.exists(obj, **kwargs):
+            rel_path = self._construct_path(obj, **kwargs)
             try:
                 key = Key(self.bucket, rel_path)
                 return key.generate_url(expires_in = 86400) # 24hrs
@@ -873,7 +846,7 @@
                                                     "requires a config file, please set one in " \
                                                     "'distributed_object_store_config_file')"
         self.backends = {}
-        self.weighted_backend_names = []
+        self.weighted_backend_ids = []
 
         random.seed()
 
@@ -884,7 +857,7 @@
         root = tree.getroot()
         log.debug('Loading backends for distributed object store from %s' % self.distributed_config)
         for elem in [ e for e in root if e.tag == 'backend' ]:
-            name = elem.get('name')
+            id = elem.get('id')
             weight = int(elem.get('weight', 1))
             if elem.get('type', 'disk'):
                 path = None
@@ -895,94 +868,90 @@
                     elif sub.tag == 'extra_dir':
                         type = sub.get('type')
                         extra_dirs[type] = sub.get('path')
-                self.backends[name] = DiskObjectStore(config, file_path=path, extra_dirs=extra_dirs)
-                log.debug("Loaded disk backend '%s' with weight %s and file_path: %s" % (name, weight, path))
+                self.backends[id] = DiskObjectStore(config, file_path=path, extra_dirs=extra_dirs)
+                log.debug("Loaded disk backend '%s' with weight %s and file_path: %s" % (id, weight, path))
                 if extra_dirs:
                     log.debug("    Extra directories:")
                     for type, dir in extra_dirs.items():
                         log.debug("        %s: %s" % (type, dir))
             for i in range(0, weight):
-                # The simplest way to do weighting: add backend names to a
+                # The simplest way to do weighting: add backend ids to a
                 # sequence the number of times equalling weight, then randomly
                 # choose a backend from that sequence at creation
-                self.weighted_backend_names.append(name)
+                self.weighted_backend_ids.append(id)
 
-    def exists(self, dataset_id, **kwargs):
-        store = self.__get_store_for(dataset_id, **kwargs)
-        return store is not None
+    def exists(self, obj, **kwargs):
+        return self.__call_method('exists', obj, False, False, **kwargs)
 
-    def store_name(self, dataset_id, **kwargs):
-        for name, store in self.backends.items():
-            if store.exists(dataset_id, **kwargs):
-                return name
-        return None
+    #def store_id(self, obj, **kwargs):
+    #    return self.__get_store_id_for(obj, **kwargs)[0]
 
-    def file_ready(self, dataset_id, **kwargs):
-        store = self.__get_store_for(dataset_id, **kwargs)
-        if store is not None:
-            return store.file_ready(dataset_id, **kwargs)
-        return False
+    def file_ready(self, obj, **kwargs):
+        return self.__call_method('file_ready', obj, False, False, **kwargs)
 
-    def create(self, dataset_id, **kwargs):
-        store_name = kwargs.pop('store_name', None)
-        if not self.exists(dataset_id, **kwargs):
-            if store_name is None or store_name not in self.backends:
-                store_name = random.choice(self.weighted_backend_names)
-                log.debug("Selected backend '%s' for creation of dataset %s" % (store_name, dataset_id))
+    def create(self, obj, **kwargs):
+        """
+        create() is the only method in which obj.object_store_id may be None
+        """
+        if obj.object_store_id is None or not self.exists(obj, **kwargs):
+            if obj.object_store_id is None or obj.object_store_id not in self.backends:
+                obj.object_store_id = random.choice(self.weighted_backend_ids)
+                object_session( obj ).add( obj )
+                object_session( obj ).flush()
+                log.debug("Selected backend '%s' for creation of %s %s" % (obj.object_store_id, obj.__class__.__name__, obj.id))
             else:
-                log.debug("Using preferred backend '%s' for creation of dataset %s" % (store_name, dataset_id))
-            return self.backends[store_name].create(dataset_id, **kwargs)
+                log.debug("Using preferred backend '%s' for creation of %s %s" % (obj.object_store_id, obj.__class__.__name__, obj.id))
+            self.backends[obj.object_store_id].create(obj, **kwargs)
 
-    def empty(self, dataset_id, **kwargs):
-        store = self.__get_store_for(dataset_id, **kwargs)
-        if store is not None:
-            return store.empty(dataset_id, **kwargs)
-        return True
+    def empty(self, obj, **kwargs):
+        return self.__call_method('empty', obj, True, False, **kwargs)
 
-    def size(self, dataset_id, **kwargs):
-        store = self.__get_store_for(dataset_id, **kwargs)
-        if store is not None:
-            return store.size(dataset_id, **kwargs)
-        return 0
+    def size(self, obj, **kwargs):
+        return self.__call_method('size', obj, 0, False, **kwargs)
 
-    def delete(self, dataset_id, entire_dir=False, **kwargs):
-        store = self.__get_store_for(dataset_id, **kwargs)
-        if store is not None:
-            return store.delete(dataset_id, entire_dir=entire_dir, **kwargs)
-        return False
+    def delete(self, obj, entire_dir=False, **kwargs):
+        return self.__call_method('delete', obj, False, False, **kwargs)
 
-    def get_data(self, dataset_id, start=0, count=-1, **kwargs):
-        store = self.__get_store_for(dataset_id, **kwargs)
-        if store is not None:
-            return store.get_data(dataset_id, start=0, count=-1, **kwargs)
-        raise ObjectNotFound()
+    def get_data(self, obj, start=0, count=-1, **kwargs):
+        return self.__call_method('get_data', obj, ObjectNotFound, True, **kwargs)
 
-    def get_filename(self, dataset_id, **kwargs):
-        store = self.__get_store_for(dataset_id, **kwargs)
-        if store is not None:
-            return store.get_filename(dataset_id, **kwargs)
-        raise ObjectNotFound()
+    def get_filename(self, obj, **kwargs):
+        return self.__call_method('get_filename', obj, ObjectNotFound, True, **kwargs)
 
-    def update_from_file(self, dataset_id, file_name=None, create=False, **kwargs):
-        store = self.__get_store_for(dataset_id, **kwargs)
-        if store is not None:
-            return store.update_from_file(dataset_id, file_name=file_name, create=create, **kwargs)
-        if create:
-            store_name = random.choice(self.weighted_backend_names)
-            return self.backends[store_name].update_from_file(dataset_id, file_name=file_name, create=create, **kwargs)
-        raise ObjectNotFound()
+    def update_from_file(self, obj, file_name=None, create=False, **kwargs):
+        # can raise ObjectLocationMismatch
+        # TODO: handling create=True here?  probably not since create() is called from w/in, so a store will be selected there
+        #if create and not self.exists(obj, **kwargs):
+        #    store_id = random.choice(self.weighted_backend_names)
+        return self.__call_method('update_from_file', obj, ObjectNotFound, True, **kwargs)
 
-    def get_object_url(self, dataset_id, **kwargs):
-        # FIXME: dir_only
-        store = self.__get_store_for(dataset_id, **kwargs)
-        if store is not None:
-            return store.get_object_url(dataset_id, **kwargs)
-        return None
+    def get_object_url(self, obj, **kwargs):
+        return self.__call_method('get_object_url', obj, None, False, **kwargs)
 
-    def __get_store_for(self, dataset_id, **kwargs):
-        for store in self.backends.values():
-            if store.exists(dataset_id, **kwargs):
-                return store
+    def __call_method(self, method, obj, default, default_is_exception, **kwargs):
+        object_store_id = self.__get_store_id_for(obj, **kwargs)
+        if object_store_id is not None:
+            return self.backends[object_store_id].__getattribute__(method)(obj, **kwargs)
+        if default_is_exception:
+            raise default()
+        else:
+            return default
+
+    def __get_store_id_for(self, obj, **kwargs):
+        if obj.object_store_id is not None and obj.object_store_id in self.backends:
+            return obj.object_store_id
+        else:
+            # if this instance has been switched from a non-distributed to a
+            # distributed object store, or if the object's store id is invalid,
+            # try to locate the object
+            log.warning('The backend object store ID (%s) for %s object with ID %s is invalid' % (obj.object_store_id, obj.__class__.__name__, obj.id))
+            for id, store in self.backends.items():
+                if store.exists(obj, **kwargs):
+                    log.warning('%s object with ID %s found in backend object store with ID %s' % (obj.__class__.__name__, obj.id, id))
+                    obj.object_store_id = id
+                    object_session( obj ).add( obj )
+                    object_session( obj ).flush()
+                    return id
         return None
 
 class HierarchicalObjectStore(ObjectStore):


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -1859,7 +1859,7 @@
                 a_files = os.listdir( temp_file_path )
                 if len( a_files ) > 0:
                     for f in a_files:
-                        self.app.object_store.update_from_file(hda.dataset.id,
+                        self.app.object_store.update_from_file(hda.dataset,
                             extra_dir="dataset_%d_files" % hda.dataset.id, 
                             alt_name = f,
                             file_name = os.path.join(temp_file_path, f),
@@ -1899,7 +1899,7 @@
                                                                           sa_session=self.sa_session )
                 self.app.security_agent.copy_dataset_permissions( outdata.dataset, child_dataset.dataset )
                 # Move data from temp location to dataset location
-                self.app.object_store.update_from_file(child_dataset.dataset.id, filename, create=True)
+                self.app.object_store.update_from_file(child_dataset.dataset, filename, create=True)
                 self.sa_session.add( child_dataset )
                 self.sa_session.flush()
                 child_dataset.set_size()
@@ -1967,7 +1967,7 @@
                 self.sa_session.add( primary_data )
                 self.sa_session.flush()
                 # Move data from temp location to dataset location
-                self.app.object_store.update_from_file(primary_data.dataset.id, filename, create=True)
+                self.app.object_store.update_from_file(primary_data.dataset, filename, create=True)
                 primary_data.set_size()
                 primary_data.name = "%s (%s)" % ( outdata.name, designation )
                 primary_data.info = outdata.info


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/tools/actions/__init__.py
--- a/lib/galaxy/tools/actions/__init__.py
+++ b/lib/galaxy/tools/actions/__init__.py
@@ -228,8 +228,7 @@
         # datasets first, then create the associations
         parent_to_child_pairs = []
         child_dataset_names = set()
-        store_name = None
-        store_name_set = False  # this is needed since None is a valid value for store_name
+        object_store_id = None
         for name, output in tool.outputs.items():
             for filter in output.filters:
                 try:
@@ -292,12 +291,12 @@
                     trans.sa_session.add( data )
                     trans.sa_session.flush()
                     trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )
-                # Create an empty file immediately
-                trans.app.object_store.create( data.id, store_name=store_name )
-                if not store_name_set:
-                    # Ensure all other datasets in this job are created in the same store
-                    store_name = trans.app.object_store.store_name( data.id )
-                    store_name_set = True
+                # Create an empty file immediately.  The first dataset will be
+                # created in the "default" store, all others will be created in
+                # the same store as the first.
+                data.dataset.object_store_id = object_store_id
+                trans.app.object_store.create( data.dataset )
+                object_store_id = data.dataset.object_store_id      # these will be the same thing after the first output
                 # This may not be neccesary with the new parent/child associations
                 data.designation = name
                 # Copy metadata from one of the inputs if requested. 
@@ -382,6 +381,7 @@
                 job.add_input_dataset( name, None )
         for name, dataset in out_data.iteritems():
             job.add_output_dataset( name, dataset )
+        job.object_store_id = object_store_id
         trans.sa_session.add( job )
         trans.sa_session.flush()
         # Some tools are not really executable, but jobs are still created for them ( for record keeping ).


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/tools/actions/upload_common.py
--- a/lib/galaxy/tools/actions/upload_common.py
+++ b/lib/galaxy/tools/actions/upload_common.py
@@ -319,8 +319,7 @@
     for name, value in tool.params_to_strings( params, trans.app ).iteritems():
         job.add_parameter( name, value )
     job.add_parameter( 'paramfile', to_json_string( json_file_path ) )
-    store_name = None
-    store_name_set = False  # this is needed since None is a valid value for store_name
+    object_store_id = None
     for i, dataset in enumerate( data_list ):
         if folder:
             job.add_output_library_dataset( 'output%i' % i, dataset )
@@ -328,11 +327,12 @@
             job.add_output_dataset( 'output%i' % i, dataset )
         # Create an empty file immediately
         if not dataset.dataset.external_filename:
-            trans.app.object_store.create( dataset.dataset.id, store_name=store_name )
+            dataset.dataset.object_store_id = object_store_id
+            trans.app.object_store.create( dataset.dataset )
+            object_store_id = dataset.dataset.object_store_id
+            trans.sa_session.add( dataset )
             # open( dataset.file_name, "w" ).close()
-            if not store_name_set:
-                store_name = trans.app.object_store.store_name( dataset.dataset.id )
-                store_name_set = True
+    job.object_store_id = object_store_id
     job.state = job.states.NEW
     trans.sa_session.add( job )
     trans.sa_session.flush()


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/web/controllers/dataset.py
--- a/lib/galaxy/web/controllers/dataset.py
+++ b/lib/galaxy/web/controllers/dataset.py
@@ -360,16 +360,16 @@
         data = self._check_dataset(trans, dataset_id)
         if isinstance( data, basestring ):
             return data
-        log.debug( "dataset.py -> transfer_status: Checking transfer status for dataset %s..." % data.id )
+        log.debug( "Checking transfer status for dataset %s..." % data.dataset.id )
         
         # Pulling files in extra_files_path into cache is not handled via this
         # method but that's primarily because those files are typically linked to
         # through tool's output page anyhow so tying a JavaScript event that will
         # call this method does not seem doable?
-        if trans.app.object_store.file_ready(data.id):
+        if data.dataset.external_filename:
             return True
         else:
-            return False
+            return trans.app.object_store.file_ready(data.dataset)
     
     @web.expose
     def display(self, trans, dataset_id=None, preview=False, filename=None, to_ext=None, **kwd):
@@ -382,7 +382,7 @@
         
         if filename and filename != "index":
             # For files in extra_files_path
-            file_path = trans.app.object_store.get_filename(data.dataset.id, extra_dir='dataset_%s_files' % data.dataset.id, alt_name=filename)
+            file_path = trans.app.object_store.get_filename(data.dataset, extra_dir='dataset_%s_files' % data.dataset.id, alt_name=filename)
             if os.path.exists( file_path ):
                 if os.path.isdir( file_path ):
                     return trans.show_error_message( "Directory listing is not allowed." ) #TODO: Reconsider allowing listing of directories?


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/web/controllers/history.py
--- a/lib/galaxy/web/controllers/history.py
+++ b/lib/galaxy/web/controllers/history.py
@@ -663,7 +663,7 @@
                     trans.response.set_content_type( 'application/x-gzip' )
                 else:
                     trans.response.set_content_type( 'application/x-tar' )
-                return trans.app.object_store.get_data(jeha.dataset.id)
+                return trans.app.object_store.get_data(jeha.dataset)
             elif jeha.job.state in [ model.Job.states.RUNNING, model.Job.states.QUEUED, model.Job.states.WAITING ]:
                 return trans.show_message( "Still exporting history %(n)s; please check back soon. Link: <a href='%(s)s'>%(s)s</a>" \
                         % ( { 'n' : history.name, 's' : url_for( action="export_archive", id=id, qualified=True ) } ) )


diff -r 94d54ff122b0dead007901d1a0fcaebf5c329d0f -r ff576e7f989c186dcec3c6af76ffa37aad4994bf lib/galaxy/web/controllers/library_common.py
--- a/lib/galaxy/web/controllers/library_common.py
+++ b/lib/galaxy/web/controllers/library_common.py
@@ -1647,6 +1647,7 @@
             for ldda_id in ldda_ids:
                 try:
                     ldda = trans.sa_session.query( trans.app.model.LibraryDatasetDatasetAssociation ).get( trans.security.decode_id( ldda_id ) )
+                    assert not ldda.dataset.purged
                     lddas.append( ldda )
                 except:
                     ldda = None

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

Bitbucket

tags

participants (1)