commit/galaxy-central: dannon: NestedObjectStore/HierarchicalObjectStore. Object store configuration now possible through object_store_conf.xml (which overrides all universe objectstore settings).
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/751e7989d076/ Changeset: 751e7989d076 User: dannon Date: 2013-09-25 20:41:51 Summary: NestedObjectStore/HierarchicalObjectStore. Object store configuration now possible through object_store_conf.xml (which overrides all universe objectstore settings). Affected #: 6 files diff -r e5301506fcbe9dc5b1ae92d347b200254a3c63fa -r 751e7989d0766d101df5c3175e6164afc86ba7c8 lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -242,6 +242,9 @@ self.os_is_secure = string_as_bool( kwargs.get( 'os_is_secure', True ) ) self.os_conn_path = kwargs.get( 'os_conn_path', '/' ) self.object_store_cache_size = float(kwargs.get( 'object_store_cache_size', -1 )) + self.object_store_config_file = kwargs.get( 'object_store_config_file', None ) + if self.object_store_config_file is not None: + self.object_store_config_file = resolve_path( self.object_store_config_file, self.root ) self.distributed_object_store_config_file = kwargs.get( 'distributed_object_store_config_file', None ) if self.distributed_object_store_config_file is not None: self.distributed_object_store_config_file = resolve_path( self.distributed_object_store_config_file, self.root ) diff -r e5301506fcbe9dc5b1ae92d347b200254a3c63fa -r 751e7989d0766d101df5c3175e6164afc86ba7c8 lib/galaxy/objectstore/__init__.py --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -5,7 +5,6 @@ """ import os -import time import random import shutil import logging @@ -25,7 +24,7 @@ """ ObjectStore abstract interface """ - def __init__(self, config, **kwargs): + def __init__(self, config, config_xml=None, **kwargs): self.running = True self.extra_dirs = {} @@ -194,15 +193,25 @@ True >>> assert s.get_filename(obj) == file_path + '/000/dataset_1.dat' """ - def __init__(self, config, file_path=None, extra_dirs=None): - super(DiskObjectStore, self).__init__(config, file_path=file_path, extra_dirs=extra_dirs) + def __init__(self, config, config_xml=None, file_path=None, extra_dirs=None): + super(DiskObjectStore, self).__init__(config, config_xml=None, file_path=file_path, extra_dirs=extra_dirs) self.file_path = file_path or config.file_path self.config = config self.check_old_style = config.object_store_check_old_style self.extra_dirs['job_work'] = config.job_working_directory self.extra_dirs['temp'] = config.new_file_path + #The new config_xml overrides universe settings. + if config_xml: + for e in config_xml: + if e.tag == 'files_dir': + self.file_path = e.get('path') + else: + self.extra_dirs[e.tag] = e.get('path') if extra_dirs is not None: self.extra_dirs.update( extra_dirs ) + print "DISK OBJECT STORE:" + print self.file_path + print self.extra_dirs def _get_filename(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): """Class method that returns the absolute path for the file corresponding @@ -364,6 +373,69 @@ super(CachingObjectStore, self).__init__(self, path, backend) +class NestedObjectStore(ObjectStore): + """ + Base for ObjectStores that use other ObjectStores + (DistributedObjectStore, HierarchicalObjectStore) + """ + + def __init__(self, config, config_xml=None): + super(NestedObjectStore, self).__init__(config, config_xml=config_xml) + self.backends = {} + + def shutdown(self): + [b.shutdown() for b in self.backends.values()] + if self.sleeper is not None: + self.sleeper.wake() + super(NestedObjectStore, self).shutdown() + + def exists(self, obj, **kwargs): + return self.__call_method('exists', obj, False, False, **kwargs) + + def file_ready(self, obj, **kwargs): + return self.__call_method('file_ready', obj, False, False, **kwargs) + + def create(self, obj, **kwargs): + random.choice(self.backends.values()).create(obj, **kwargs) + + def empty(self, obj, **kwargs): + return self.__call_method('empty', obj, True, False, **kwargs) + + def size(self, obj, **kwargs): + return self.__call_method('size', obj, 0, False, **kwargs) + + def delete(self, obj, **kwargs): + return self.__call_method('delete', obj, False, False, **kwargs) + + def get_data(self, obj, **kwargs): + return self.__call_method('get_data', obj, ObjectNotFound, True, **kwargs) + + def get_filename(self, obj, **kwargs): + return self.__call_method('get_filename', obj, ObjectNotFound, True, **kwargs) + + def update_from_file(self, obj, **kwargs): + if kwargs.get('create', False): + self.create(obj, **kwargs) + kwargs['create'] = False + return self.__call_method('update_from_file', obj, ObjectNotFound, True, **kwargs) + + def get_object_url(self, obj, **kwargs): + return self.__call_method('get_object_url', obj, None, False, **kwargs) + + def __call_method(self, method, obj, default, default_is_exception, **kwargs): + """ + Check all children object stores for the first one with the dataset + """ + for key, store in self.backends.items(): + if store.exists(obj, **kwargs): + return store.__getattribute__(method)(obj, **kwargs) + if default_is_exception: + raise default( 'objectstore, __call_method failed: %s on %s, kwargs: %s' + %( method, str( obj ), str( kwargs ) ) ) + else: + return default + + class DistributedObjectStore(ObjectStore): """ ObjectStore that defers to a list of backends, for getting objects the @@ -371,12 +443,13 @@ store selected randomly, but with weighting. """ - def __init__(self, config, fsmon=False): - super(DistributedObjectStore, self).__init__(config) - self.distributed_config = config.distributed_object_store_config_file - assert self.distributed_config is not None, "distributed object store ('object_store = distributed') " \ - "requires a config file, please set one in " \ - "'distributed_object_store_config_file')" + def __init__(self, config, config_xml=None, fsmon=False): + super(DistributedObjectStore, self).__init__(config, config_xml=config_xml) + if not config_xml: + self.distributed_config = config.distributed_object_store_config_file + assert self.distributed_config is not None, "distributed object store ('object_store = distributed') " \ + "requires a config file, please set one in " \ + "'distributed_object_store_config_file')" self.backends = {} self.weighted_backend_ids = [] self.original_weighted_backend_ids = [] @@ -385,7 +458,7 @@ random.seed() - self.__parse_distributed_config(config) + self.__parse_distributed_config(config, config_xml) self.sleeper = None if fsmon and ( self.global_max_percent_full or filter( lambda x: x != 0.0, self.max_percent_full.values() ) ): @@ -395,10 +468,14 @@ self.filesystem_monitor_thread.start() log.info("Filesystem space monitor started") - def __parse_distributed_config(self, config): - tree = util.parse_xml(self.distributed_config) - root = tree.getroot() - log.debug('Loading backends for distributed object store from %s' % self.distributed_config) + def __parse_distributed_config(self, config, config_xml = None): + if not config_xml: + tree = util.parse_xml(self.distributed_config) + root = tree.getroot() + log.debug('Loading backends for distributed object store from %s' % self.distributed_config) + else: + root = config_xml.find('backends') + log.debug('Loading backends for distributed object store from %s' % config_xml.get('id')) self.global_max_percent_full = float(root.get('maxpctfull', 0)) for elem in [ e for e in root if e.tag == 'backend' ]: id = elem.get('id') @@ -438,17 +515,6 @@ self.weighted_backend_ids = new_weighted_backend_ids self.sleeper.sleep(120) # Test free space every 2 minutes - def shutdown(self): - super(DistributedObjectStore, self).shutdown() - if self.sleeper is not None: - self.sleeper.wake() - - def exists(self, obj, **kwargs): - return self.__call_method('exists', obj, False, False, **kwargs) - - def file_ready(self, obj, **kwargs): - return self.__call_method('file_ready', obj, False, False, **kwargs) - def create(self, obj, **kwargs): """ create() is the only method in which obj.object_store_id may be None @@ -467,30 +533,6 @@ log.debug("Using preferred backend '%s' for creation of %s %s" % (obj.object_store_id, obj.__class__.__name__, obj.id)) self.backends[obj.object_store_id].create(obj, **kwargs) - def empty(self, obj, **kwargs): - return self.__call_method('empty', obj, True, False, **kwargs) - - def size(self, obj, **kwargs): - return self.__call_method('size', obj, 0, False, **kwargs) - - def delete(self, obj, **kwargs): - return self.__call_method('delete', obj, False, False, **kwargs) - - def get_data(self, obj, **kwargs): - return self.__call_method('get_data', obj, ObjectNotFound, True, **kwargs) - - def get_filename(self, obj, **kwargs): - return self.__call_method('get_filename', obj, ObjectNotFound, True, **kwargs) - - def update_from_file(self, obj, **kwargs): - if kwargs.get('create', False): - self.create(obj, **kwargs) - kwargs['create'] = False - return self.__call_method('update_from_file', obj, ObjectNotFound, True, **kwargs) - - def get_object_url(self, obj, **kwargs): - return self.__call_method('get_object_url', obj, None, False, **kwargs) - def __call_method(self, method, obj, default, default_is_exception, **kwargs): object_store_id = self.__get_store_id_for(obj, **kwargs) if object_store_id is not None: @@ -519,33 +561,70 @@ return None -class HierarchicalObjectStore(ObjectStore): +class HierarchicalObjectStore(NestedObjectStore): """ ObjectStore that defers to a list of backends, for getting objects the first store where the object exists is used, objects are always created in the first store. """ - def __init__(self, backends=[]): - super(HierarchicalObjectStore, self).__init__() + def __init__(self, config, config_xml=None, fsmon=False): + super(HierarchicalObjectStore, self).__init__(config, config_xml=config_xml) + for b in config_xml.find('backends'): + self.backends[int(b.get('order'))] = build_object_store_from_config(config, fsmon=fsmon, config_xml=b) + def shutdown(self): + for store in self.backends.values(): + store.shutdown() + super(HierarchicalObjectStore, self).shutdown() -def build_object_store_from_config(config, fsmon=False): - """ Depending on the configuration setting, invoke the appropriate object store + def exists(self, obj, **kwargs): + """ + Exists must check all child object stores + """ + for store in self.backends.values(): + if store.exists(obj, **kwargs): + return True + return False + + def create(self, obj, **kwargs): + """ + Create will always be called by the primary object_store + """ + self.backends[0].create(obj, **kwargs) + + +def build_object_store_from_config(config, fsmon=False, config_xml=None): """ - store = config.object_store + Depending on the configuration setting, invoke the appropriate object store + """ + + if not config_xml and config.object_store_config_file: + # This is a top level invocation of build_object_store_from_config, and + # we have an object_store_conf.xml -- read the .xml and build + # accordingly + tree = util.parse_xml(config.object_store_config_file) + root = tree.getroot() + store = root.get('type') + config_xml = root + elif config_xml: + store = config_xml.get('type') + else: + store = config.object_store + if store == 'disk': - return DiskObjectStore(config=config) + print "BUILDING DISK OBJECT STORE" + return DiskObjectStore(config=config, config_xml=config_xml) elif store == 's3' or store == 'swift': from galaxy.objectstore.s3 import S3ObjectStore - return S3ObjectStore(config=config) + return S3ObjectStore(config=config, config_xml=config_xml) elif store == 'distributed': - return DistributedObjectStore(config=config, fsmon=fsmon) + return DistributedObjectStore(config=config, fsmon=fsmon, config_xml=config_xml) elif store == 'hierarchical': - return HierarchicalObjectStore() + return HierarchicalObjectStore(config=config, config_xml=config_xml) elif store == 'irods': from galaxy.objectstore.rods import IRODSObjectStore - return IRODSObjectStore(config=config) + return IRODSObjectStore(config=config, config_xml=config_xml) else: log.error("Unrecognized object store definition: {0}".format(store)) diff -r e5301506fcbe9dc5b1ae92d347b200254a3c63fa -r 751e7989d0766d101df5c3175e6164afc86ba7c8 lib/galaxy/objectstore/s3.py --- a/lib/galaxy/objectstore/s3.py +++ b/lib/galaxy/objectstore/s3.py @@ -33,7 +33,7 @@ cache exists that is used as an intermediate location for files between Galaxy and S3. """ - def __init__(self, config): + def __init__(self, config, config_xml=None): super(S3ObjectStore, self).__init__() self.config = config self.staging_path = self.config.file_path diff -r e5301506fcbe9dc5b1ae92d347b200254a3c63fa -r 751e7989d0766d101df5c3175e6164afc86ba7c8 object_store_conf.xml.sample --- /dev/null +++ b/object_store_conf.xml.sample @@ -0,0 +1,24 @@ +<?xml version="1.0"?> +<object_store type="hierarchical"> + <backends> + <object_store type="distributed" id="primary" order="0"> + <backends> + <backend id="files1" type="disk" weight="1"> + <files_dir path="database/files1"/> + <extra_dir type="temp" path="database/tmp1"/> + <extra_dir type="job_work" path="database/job_working_directory1"/> + </backend> + <backend id="files2" type="disk" weight="1"> + <files_dir path="database/files2"/> + <extra_dir type="temp" path="database/tmp2"/> + <extra_dir type="job_work" path="database/job_working_directory2"/> + </backend> + </backends> + </object_store> + <object_store type="disk" id="secondary" order="1"> + <files_dir path="database/files3"/> + <extra_dir type="temp" path="database/tmp3"/> + <extra_dir type="job_work" path="database/job_working_directory3"/> + </object_store> + </backends> +</object_store> diff -r e5301506fcbe9dc5b1ae92d347b200254a3c63fa -r 751e7989d0766d101df5c3175e6164afc86ba7c8 templates/webapps/galaxy/history/display.mako --- a/templates/webapps/galaxy/history/display.mako +++ b/templates/webapps/galaxy/history/display.mako @@ -81,4 +81,4 @@ %endif ${_("This history is empty.")} </div> -</%def> \ No newline at end of file +</%def> diff -r e5301506fcbe9dc5b1ae92d347b200254a3c63fa -r 751e7989d0766d101df5c3175e6164afc86ba7c8 universe_wsgi.ini.sample --- a/universe_wsgi.ini.sample +++ b/universe_wsgi.ini.sample @@ -204,6 +204,10 @@ #collect_outputs_from = new_file_path,job_working_directory # -- Data Storage (Object Store) +# +# Configuration file for the object store +# If this is set and exists, it overrides any other objectstore settings. +# object_store_config_file = object_store_conf.xml # Object store backend module (valid options are: disk, s3, swift, irods, # distributed, hierarchical) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org