1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/ab90893a7cf5/ changeset: ab90893a7cf5 user: greg date: 2011-11-23 22:16:15 summary: Re-engineer the datatypes registry so that it is initialized once when the Galaxy server is started, but data types can continue to be loaded throughout the Galaxy server's session (hopefully this doesn't break anything). Add support for a single "import_module" to be passed to the new load_datatypes() method in the datatypes registry. This provides the ability to load a single class module from an installed tool shed repository along with a datatypes_conf.xml file included in the installed repository and pass them to the new load_datatypes() method. In the future, multiple imported modules may be allowed. The datatypes_conf.xml file included in the repository must conform to a slightly different definition than the same named file that comes with the distribution. This new definition will be documented in the Galaxy tool shed wiki. We now have the ability to load new data types into the Galaxy server from an installed tool shed repository without restarting the Galaxy server. affected #: 12 files diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f lib/galaxy/app.py --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -22,7 +22,8 @@ self.config.check() config.configure_logging( self.config ) # Set up datatypes registry - self.datatypes_registry = galaxy.datatypes.registry.Registry( self.config.root, self.config.datatypes_config ) + self.datatypes_registry = galaxy.datatypes.registry.Registry() + self.datatypes_registry.load_datatypes( self.config.root, self.config.datatypes_config ) galaxy.model.set_datatypes_registry( self.datatypes_registry ) # Set up the tool sheds registry if os.path.isfile( self.config.tool_sheds_config ): diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f lib/galaxy/datatypes/registry.py --- a/lib/galaxy/datatypes/registry.py +++ b/lib/galaxy/datatypes/registry.py @@ -12,7 +12,7 @@ pass class Registry( object ): - def __init__( self, root_dir=None, config=None ): + def __init__( self ): self.log = logging.getLogger(__name__) self.log.addHandler( logging.NullHandler() ) self.datatypes_by_extension = {} @@ -27,21 +27,33 @@ self.sniff_order = [] self.upload_file_formats = [] self.display_applications = odict() #map a display application id to a display application - inherit_display_application_by_class = [] + self.datatype_converters_path = None + self.datatype_indexers_path = None + self.display_applications_path = None + def load_datatypes( self, root_dir=None, config=None, imported_module=None ): if root_dir and config: + inherit_display_application_by_class = [] # Parse datatypes_conf.xml tree = galaxy.util.parse_xml( config ) root = tree.getroot() # Load datatypes and converters from config self.log.debug( 'Loading datatypes from %s' % config ) registration = root.find( 'registration' ) - self.datatype_converters_path = os.path.join( root_dir, registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' ) ) - self.datatype_indexers_path = os.path.join( root_dir, registration.get( 'indexers_path', 'lib/galaxy/datatypes/indexers' ) ) - self.display_applications_path = os.path.join( root_dir, registration.get( 'display_path', 'display_applications' ) ) - if not os.path.isdir( self.datatype_converters_path ): - raise ConfigurationError( "Directory does not exist: %s" % self.datatype_converters_path ) - if not os.path.isdir( self.datatype_indexers_path ): - raise ConfigurationError( "Directory does not exist: %s" % self.datatype_indexers_path ) + # The following implementation implies that only the first datatypes_conf.xml parsed will + # define the various paths. This is probably ok, since we can justifiably require that the + # local datatypes_conf.xml file sets the standard, and all additional datatypes_conf.xml + # files installed with repositories from tool sheds must use the same paths. However, we + # may discover at some future time that allowing for multiple paths is more optimal. + if not self.datatype_converters_path: + self.datatype_converters_path = os.path.join( root_dir, registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' ) ) + if not os.path.isdir( self.datatype_converters_path ): + raise ConfigurationError( "Directory does not exist: %s" % self.datatype_converters_path ) + if not self.datatype_indexers_path: + self.datatype_indexers_path = os.path.join( root_dir, registration.get( 'indexers_path', 'lib/galaxy/datatypes/indexers' ) ) + if not os.path.isdir( self.datatype_indexers_path ): + raise ConfigurationError( "Directory does not exist: %s" % self.datatype_indexers_path ) + if not self.display_applications_path: + self.display_applications_path = os.path.join( root_dir, registration.get( 'display_path', 'display_applications' ) ) for elem in registration.findall( 'datatype' ): try: extension = elem.get( 'extension', None ) @@ -55,11 +67,14 @@ fields = dtype.split( ':' ) datatype_module = fields[0] datatype_class_name = fields[1] - fields = datatype_module.split( '.' ) - module = __import__( fields.pop(0) ) - for mod in fields: - module = getattr( module, mod ) - datatype_class = getattr( module, datatype_class_name ) + if imported_module: + datatype_class = getattr( imported_module, datatype_class_name ) + else: + fields = datatype_module.split( '.' ) + module = __import__( fields.pop(0) ) + for mod in fields: + module = getattr( module, mod ) + datatype_class = getattr( module, datatype_class_name ) elif type_extension: datatype_class = self.datatypes_by_extension[type_extension].__class__ if make_subclass: @@ -252,10 +267,8 @@ if not included: self.sniff_order.append(datatype) append_to_sniff_order() - def get_available_tracks(self): return self.available_tracks - def get_mimetype_by_extension(self, ext, default = 'application/octet-stream' ): """Returns a mimetype based on an extension""" try: @@ -265,7 +278,6 @@ mimetype = default self.log.warning('unknown mimetype in data factory %s' % ext) return mimetype - def get_datatype_by_extension(self, ext ): """Returns a datatype based on an extension""" try: @@ -273,7 +285,6 @@ except KeyError: builder = data.Text() return builder - def change_datatype(self, data, ext, set_meta = True ): data.extension = ext # call init_meta and copy metadata from itself. The datatype @@ -287,7 +298,6 @@ data.set_meta( overwrite = False ) data.set_peek() return data - def old_change_datatype(self, data, ext): """Creates and returns a new datatype based on an existing data and an extension""" newdata = factory(ext)(id=data.id) @@ -295,7 +305,6 @@ setattr(newdata, key, value) newdata.ext = ext return newdata - def load_datatype_converters( self, toolbox ): """Adds datatype converters from self.converters to the calling app's toolbox""" for elem in self.converters: @@ -312,7 +321,6 @@ self.log.debug( "Loaded converter: %s", converter.id ) except: self.log.exception( "error reading converter from path: %s" % converter_path ) - def load_external_metadata_tool( self, toolbox ): """Adds a tool which is used to set external metadata""" #we need to be able to add a job to the queue to set metadata. The queue will currently only accept jobs with an associated tool. @@ -337,7 +345,6 @@ toolbox.tools_by_id[ set_meta_tool.id ] = set_meta_tool self.set_external_metadata_tool = set_meta_tool self.log.debug( "Loaded external metadata tool: %s", self.set_external_metadata_tool.id ) - def load_datatype_indexers( self, toolbox ): """Adds indexers from self.indexers to the toolbox from app""" for elem in self.indexers: @@ -347,7 +354,6 @@ toolbox.tools_by_id[indexer.id] = indexer self.datatype_indexers[datatype] = indexer self.log.debug( "Loaded indexer: %s", indexer.id ) - def get_converters_by_datatype(self, ext): """Returns available converters by source type""" converters = odict() @@ -360,7 +366,6 @@ if ext in self.datatype_converters.keys(): converters.update(self.datatype_converters[ext]) return converters - def get_indexers_by_datatype( self, ext ): """Returns indexers based on datatype""" class_chain = list() @@ -373,14 +378,12 @@ ext2type = lambda x: self.get_datatype_by_extension(x) class_chain = sorted(class_chain, lambda x,y: issubclass(ext2type(x),ext2type(y)) and -1 or 1) return [self.datatype_indexers[x] for x in class_chain] - def get_converter_by_target_type(self, source_ext, target_ext): """Returns a converter based on source and target datatypes""" converters = self.get_converters_by_datatype(source_ext) if target_ext in converters.keys(): return converters[target_ext] return None - def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe = True ): """Returns ( target_ext, existing converted dataset )""" for convert_ext in self.get_converters_by_datatype( dataset.ext ): @@ -394,10 +397,8 @@ ret_data = None return ( convert_ext, ret_data ) return ( None, None ) - def get_composite_extensions( self ): return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ] - def get_upload_metadata_params( self, context, group, tool ): """Returns dict of case value:inputs for metadata conditional for upload tool""" rval = {} @@ -413,4 +414,3 @@ if 'auto' not in rval and 'txt' in rval: #need to manually add 'auto' datatype rval[ 'auto' ] = rval[ 'txt' ] return rval - diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f lib/galaxy/datatypes/sniff.py --- a/lib/galaxy/datatypes/sniff.py +++ b/lib/galaxy/datatypes/sniff.py @@ -280,6 +280,7 @@ """ if sniff_order is None: datatypes_registry = registry.Registry() + datatypes_registry.load_datatypes() sniff_order = datatypes_registry.sniff_order for datatype in sniff_order: """ diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -25,7 +25,9 @@ log = logging.getLogger( __name__ ) -datatypes_registry = galaxy.datatypes.registry.Registry() #Default Value Required for unit tests +datatypes_registry = galaxy.datatypes.registry.Registry() +# Default Value Required for unit tests +datatypes_registry.load_datatypes() class NoConverterException(Exception): def __init__(self, value): diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -1307,7 +1307,9 @@ if tool is None: #This occurs for things such as unit tests import galaxy.datatypes.registry - formats.append( galaxy.datatypes.registry.Registry().get_datatype_by_extension( extension.lower() ).__class__ ) + datatypes_registry = galaxy.datatypes.registry.Registry() + datatypes_registry.load_datatypes() + formats.append( datatypes_registry.get_datatype_by_extension( extension.lower() ).__class__ ) else: formats.append( tool.app.datatypes_registry.get_datatype_by_extension( extension.lower() ).__class__ ) self.formats = tuple( formats ) diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f lib/galaxy/util/none_like.py --- a/lib/galaxy/util/none_like.py +++ b/lib/galaxy/util/none_like.py @@ -20,7 +20,9 @@ def __init__( self, datatypes_registry = None, ext = 'data', dbkey = '?' ): self.ext = self.extension = ext self.dbkey = dbkey - if datatypes_registry is None: datatypes_registry = Registry() + if datatypes_registry is None: + datatypes_registry = Registry() + datatypes_registry.load_datatypes() self.datatype = datatypes_registry.get_datatype_by_extension( ext ) self._metadata = None self.metadata = MetadataCollection( self ) diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f lib/galaxy/web/controllers/admin.py --- a/lib/galaxy/web/controllers/admin.py +++ b/lib/galaxy/web/controllers/admin.py @@ -4,7 +4,7 @@ from galaxy.web.framework.helpers import time_ago, iff, grids from galaxy.tools.search import ToolBoxSearch from galaxy.tools import ToolSection, json_fix -from galaxy.util import inflector +from galaxy.util import parse_xml, inflector import logging log = logging.getLogger( __name__ ) @@ -874,8 +874,7 @@ tmp_stderr.close() if returncode == 0: # Load data types required by tools. - # TODO: uncomment the following when we're ready... - #self.__load_datatypes( trans, repo_files_dir ) + self.__load_datatypes( trans, repo_files_dir ) # Load tools and tool data files required by them. sample_files, repository_tools_tups = self.__get_repository_tools_and_sample_files( trans, tool_path, repo_files_dir ) if repository_tools_tups: @@ -898,9 +897,8 @@ persisted_new_tool_section.write( new_tool_section ) persisted_new_tool_section.close() # Parse the persisted tool panel section - tree = ElementTree.parse( tmp_name ) + tree = parse_xml( tmp_name ) root = tree.getroot() - ElementInclude.include( root ) # Load the tools in the section into the tool panel. trans.app.toolbox.load_section_tag_set( root, trans.app.toolbox.tool_panel, tool_path ) # Remove the temporary file @@ -1186,35 +1184,49 @@ datatypes_config = os.path.abspath( os.path.join( root, name ) ) break if datatypes_config: + imported_module = None # Parse datatypes_config. - tree = ElementTree.parse( datatypes_config ) - root = tree.getroot() - ElementInclude.include( root ) - datatype_files = root.find( 'datatype_files' ) + tree = parse_xml( datatypes_config ) + datatypes_config_root = tree.getroot() + relative_path_to_datatype_file_name = None + datatype_files = datatypes_config_root.find( 'datatype_files' ) + # Currently only a single datatype_file is supported. For example: + # <datatype_files> + # <datatype_file name="gmap.py"/> + # </datatype_files> for elem in datatype_files.findall( 'datatype_file' ): datatype_file_name = elem.get( 'name', None ) if datatype_file_name: # Find the file in the installed repository. - relative_path = None for root, dirs, files in os.walk( repo_files_dir ): if root.find( '.hg' ) < 0: for name in files: if name == datatype_file_name: - relative_path = os.path.join( root, name ) + relative_path_to_datatype_file_name = os.path.join( root, name ) break - relative_head, relative_tail = os.path.split( relative_path ) - # TODO: get the import_module by parsing the <registration><datatype> tags - if datatype_file_name.find( '.' ) > 0: - import_module = datatype_file_name.split( '.' )[ 0 ] - else: - import_module = datatype_file_name - try: - sys.path.insert( 0, relative_head ) - module = __import__( import_module ) - sys.path.pop( 0 ) - except Exception, e: - log.debug( "Exception importing datatypes code file included in installed repository: %s" % str( e ) ) - trans.app.datatypes_registry = galaxy.datatypes.registry.Registry( trans.app.config.root, datatypes_config ) + break + if relative_path_to_datatype_file_name: + relative_head, relative_tail = os.path.split( relative_path_to_datatype_file_name ) + registration = datatypes_config_root.find( 'registration' ) + # Get the module by parsing the <datatype> tag. + for elem in registration.findall( 'datatype' ): + # A 'type' attribute is currently required. The attribute + # should be something like: type="gmap:GmapDB". + dtype = elem.get( 'type', None ) + if dtype: + fields = dtype.split( ':' ) + datatype_module = fields[0] + datatype_class_name = fields[1] + # Since we currently support only a single datatype_file, + # we have what we need. + break + try: + sys.path.insert( 0, relative_head ) + imported_module = __import__( datatype_module ) + sys.path.pop( 0 ) + except Exception, e: + log.debug( "Exception importing datatypes code file included in installed repository: %s" % str( e ) ) + trans.app.datatypes_registry.load_datatypes( root_dir=trans.app.config.root, config=datatypes_config, imported_module=imported_module ) def __get_repository_tools_and_sample_files( self, trans, tool_path, repo_files_dir ): # The sample_files list contains all files whose name ends in .sample sample_files = [] diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f lib/galaxy/webapps/community/app.py --- a/lib/galaxy/webapps/community/app.py +++ b/lib/galaxy/webapps/community/app.py @@ -15,7 +15,8 @@ self.config.check() config.configure_logging( self.config ) # Set up datatypes registry - self.datatypes_registry = galaxy.datatypes.registry.Registry( self.config.root, self.config.datatypes_config ) + self.datatypes_registry = galaxy.datatypes.registry.Registry() + self.datatypes_registry.load_datatypes( self.config.root, self.config.datatypes_config ) # Determine the database url if self.config.database_connection: db_url = self.config.database_connection diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f scripts/functional_tests.py --- a/scripts/functional_tests.py +++ b/scripts/functional_tests.py @@ -244,7 +244,9 @@ else: # FIXME: This doesn't work at all now that toolbox requires an 'app' instance # (to get at datatypes, might just pass a datatype registry directly) - my_app = bunch.Bunch( datatypes_registry = galaxy.datatypes.registry.Registry() ) + datatypes_registry = galaxy.datatypes.registry.Registry() + datatypes_registry.load_datatypes() + my_app = bunch.Bunch( datatypes_registry ) test_toolbox.toolbox = tools.ToolBox( 'tool_conf.xml.test', 'tools', my_app ) # ---- Find tests --------------------------------------------------------- diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f scripts/set_metadata.py --- a/scripts/set_metadata.py +++ b/scripts/set_metadata.py @@ -37,7 +37,9 @@ # Set up datatypes registry config_root = sys.argv.pop( 1 ) datatypes_config = sys.argv.pop( 1 ) - galaxy.model.set_datatypes_registry( galaxy.datatypes.registry.Registry( config_root, datatypes_config ) ) + datatypes_registry = galaxy.datatypes.registry.Registry() + datatypes_registry.load_datatypes( root_dir=config_root, config=datatypes_config ) + galaxy.model.set_datatypes_registry( datatypes_registry ) job_metadata = sys.argv.pop( 1 ) ext_override = dict() diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f tools/data_source/data_source.py --- a/tools/data_source/data_source.py +++ b/tools/data_source/data_source.py @@ -57,7 +57,8 @@ enhanced_handling = True json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata - datatypes_registry = Registry( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] ) + datatypes_registry = Registry() + datatypes_registry.load_datatypes( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] ) URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded URL_method = params.get( 'URL_method', None ) diff -r 856aac70b018de6a748cf49355677d4dbe27ddf6 -r ab90893a7cf51e272cafa97b4702d32edb44223f tools/data_source/upload.py --- a/tools/data_source/upload.py +++ b/tools/data_source/upload.py @@ -369,7 +369,8 @@ output_paths = parse_outputs( sys.argv[4:] ) json_file = open( 'galaxy.json', 'w' ) - registry = Registry( sys.argv[1], sys.argv[2] ) + registry = Registry() + registry.load_datatypes( root_dir=sys.argv[1], config=sys.argv[2] ) for line in open( sys.argv[3], 'r' ): dataset = from_json_string( line ) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.