commit/galaxy-central: jmchilton: Refactoring: Start abstracting XML processing out of Tool and param classes.

11 Dec 2014

1 new commit in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/2dc383cbb700/
Changeset:   2dc383cbb700
User:        jmchilton
Date:        2014-12-11 05:26:13+00:00
Summary:     Refactoring: Start abstracting XML processing out of Tool and param classes.
Long term this could allow Galaxy to support - multiple tooling formats (Galaxy-like YAML, CWL http://bit.ly/cwltooldesc, etc...). But I think it is also important from a purely design perspective - this is a core logic class integrating different components - they should not also be doing XML parsing.

To verify the interface for parsing tools is expressive enough to allow multiple useful implementations, I built a test YAML tool description that implements many of the same features as Galaxy but smooths out rough edges (uses exit codes for job failure by default for instance). Loading these tools is disabled by default and it is not documented how to enable them because they are not intended to be part of Galaxy's public API.
Affected #:  22 files

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/config.py

--- a/lib/galaxy/config.py
+++ b/lib/galaxy/config.py
@@ -200,6 +200,9 @@
         # workflows built using these modules may not function in the
         # future.
         self.enable_beta_workflow_modules = string_as_bool( kwargs.get( 'enable_beta_workflow_modules', 'False' ) )
+        # These are not even beta - just experiments - don't use them unless
+        # you want yours tools to be broken in the future.
+        self.enable_beta_tool_formats = string_as_bool( kwargs.get( 'enable_beta_tool_formats', 'False' ) )
 
         # Certain modules such as the pause module will automatically cause
         # workflows to be scheduled in job handlers the way all workflows will

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -44,7 +44,6 @@
 from galaxy.tools.actions.data_source import DataSourceToolAction
 from galaxy.tools.actions.data_manager import DataManagerToolAction
 from galaxy.tools.deps import build_dependency_manager
-from galaxy.tools.deps.requirements import parse_requirements_from_xml
 from galaxy.tools.parameters import check_param, params_from_strings, params_to_strings
 from galaxy.tools.parameters import output_collect
 from galaxy.tools.parameters.basic import (BaseURLToolParameter,
@@ -56,7 +55,9 @@
 from galaxy.tools.parameters.output import ToolOutputActionGroup
 from galaxy.tools.parameters.validation import LateValidationError
 from galaxy.tools.filters import FilterFactory
-from galaxy.tools.test import parse_tests_elem
+from galaxy.tools.test import parse_tests
+from galaxy.tools.parser import get_tool_source
+from galaxy.tools.parser.xml import XmlPageSource
 from galaxy.util import listify, parse_xml, rst_to_html, string_as_bool, string_to_object, xml_text, xml_to_string
 from galaxy.tools.parameters.meta import expand_meta_parameters
 from galaxy.util.bunch import Bunch
@@ -742,21 +743,23 @@
     def load_tool( self, config_file, guid=None, repository_id=None, **kwds ):
         """Load a single tool from the file named by `config_file` and return an instance of `Tool`."""
         # Parse XML configuration file and get the root element
-        tree = load_tool( config_file )
-        root = tree.getroot()
+        tool_source = get_tool_source( config_file, getattr( self.app.config, "enable_beta_tool_formats", False ) )
         # Allow specifying a different tool subclass to instantiate
-        if root.find( "type" ) is not None:
-            type_elem = root.find( "type" )
-            module = type_elem.get( 'module', 'galaxy.tools' )
-            cls = type_elem.get( 'class' )
+        tool_module = tool_source.parse_tool_module()
+        if tool_module is not None:
+            module, cls = tool_module
             mod = __import__( module, globals(), locals(), [cls] )
             ToolClass = getattr( mod, cls )
-        elif root.get( 'tool_type', None ) is not None:
-            ToolClass = tool_types.get( root.get( 'tool_type' ) )
+        elif tool_source.parse_tool_type():
+            tool_type = tool_source.parse_tool_type()
+            ToolClass = tool_types.get( tool_type )
         else:
             # Normal tool - only insert dynamic resource parameters for these
             # tools.
-            if hasattr( self.app, "job_config" ):  # toolshed may not have job_config?
+            root = getattr( tool_source, "root", None )
+            # TODO: mucking with the XML directly like this is terrible,
+            # modify inputs directly post load if possible.
+            if root and hasattr( self.app, "job_config" ):  # toolshed may not have job_config?
                 tool_id = root.get( 'id' ) if root else None
                 parameters = self.app.job_config.get_tool_resource_parameters( tool_id )
                 if parameters:
@@ -773,7 +776,7 @@
                     inputs.append( conditional_element )
 
             ToolClass = Tool
-        tool = ToolClass( config_file, root, self.app, guid=guid, repository_id=repository_id, **kwds )
+        tool = ToolClass( config_file, tool_source, self.app, guid=guid, repository_id=repository_id, **kwds )
         tool_id = tool.id
         if not tool_id.startswith("__"):
             # do not monitor special tools written to tmp directory - no reason
@@ -1238,7 +1241,7 @@
     dict_collection_visible_keys = ( 'id', 'name', 'version', 'description' )
     default_template = 'tool_form.mako'
 
-    def __init__( self, config_file, root, app, guid=None, repository_id=None ):
+    def __init__( self, config_file, tool_source, app, guid=None, repository_id=None ):
         """Load a tool from the config named by `config_file`"""
         # Determine the full path of the directory where the tool config is
         self.config_file = config_file
@@ -1282,7 +1285,7 @@
         #populate toolshed repository info, if available
         self.populate_tool_shed_info()
         # Parse XML element containing configuration
-        self.parse( root, guid=guid )
+        self.parse( tool_source, guid=guid )
         self.external_runJob_script = app.config.drmaa_external_runjob_script
 
     @property
@@ -1388,47 +1391,55 @@
                                     return section_id, section_name
         return None, None
 
-    def parse( self, root, guid=None ):
+    def parse( self, tool_source, guid=None ):
         """
         Read tool configuration from the element `root` and fill in `self`.
         """
         # Get the (user visible) name of the tool
-        self.name = root.get( "name" )
+        self.name = tool_source.parse_name()
         if not self.name:
             raise Exception( "Missing tool 'name'" )
         # Get the UNIQUE id for the tool
-        self.old_id = root.get( "id" )
+        self.old_id = tool_source.parse_id()
         if guid is None:
             self.id = self.old_id
         else:
             self.id = guid
         if not self.id:
             raise Exception( "Missing tool 'id'" )
-        self.version = root.get( "version" )
+        self.version = tool_source.parse_version()
         if not self.version:
             # For backward compatibility, some tools may not have versions yet.
             self.version = "1.0.0"
+
         # Support multi-byte tools
-        self.is_multi_byte = string_as_bool( root.get( "is_multi_byte", False ) )
+        self.is_multi_byte = tool_source.parse_is_multi_byte()
         # Legacy feature, ignored by UI.
         self.force_history_refresh = False
-        self.display_interface = string_as_bool( root.get( 'display_interface', str( self.display_interface ) ) )
-        self.require_login = string_as_bool( root.get( 'require_login', str( self.require_login ) ) )
-        # Load input translator, used by datasource tools to change names/values of incoming parameters
-        self.input_translator = root.find( "request_param_translation" )
-        if self.input_translator:
-            self.input_translator = ToolInputTranslator.from_element( self.input_translator )
+
+        self.display_interface = tool_source.parse_display_interface( default=self.display_interface )
+
+        self.require_login = tool_source.parse_require_login( self.require_login )
+
+        request_param_translation_elem = tool_source.parse_request_param_translation_elem()
+        if request_param_translation_elem is not None:
+            # Load input translator, used by datasource tools to change names/values of incoming parameters
+            self.input_translator = ToolInputTranslator.from_element( request_param_translation_elem )
+        else:
+            self.input_translator = None
+
         # Command line (template). Optional for tools that do not invoke a local program
-        command = root.find("command")
-        if command is not None and command.text is not None:
-            self.command = command.text.lstrip()  # get rid of leading whitespace
+        command = tool_source.parse_command()
+        if command is not None:
+            self.command = command.lstrip()  # get rid of leading whitespace
             # Must pre-pend this AFTER processing the cheetah command template
-            self.interpreter = command.get( "interpreter", None )
+            self.interpreter = tool_source.parse_interpreter()
         else:
             self.command = ''
             self.interpreter = None
+
         # Parameters used to build URL for redirection to external app
-        redirect_url_params = root.find( "redirect_url_params" )
+        redirect_url_params = tool_source.parse_redirect_url_params_elem()
         if redirect_url_params is not None and redirect_url_params.text is not None:
             # get rid of leading / trailing white space
             redirect_url_params = redirect_url_params.text.strip()
@@ -1437,25 +1448,26 @@
             self.redirect_url_params = redirect_url_params.replace( ' ', '**^**' )
         else:
             self.redirect_url_params = ''
+
         # Short description of the tool
-        self.description = xml_text(root, "description")
+        self.description = tool_source.parse_description()
+
         # Versioning for tools
         self.version_string_cmd = None
-        version_cmd = root.find("version_command")
-        if version_cmd is not None:
-            self.version_string_cmd = version_cmd.text.strip()
-            version_cmd_interpreter = version_cmd.get( "interpreter", None )
+        version_command = tool_source.parse_version_command()
+        if version_command is not None:
+            self.version_string_cmd = version_command.strip()
+
+            version_cmd_interpreter = tool_source.parse_version_command_interpreter()
             if version_cmd_interpreter:
                 executable = self.version_string_cmd.split()[0]
                 abs_executable = os.path.abspath(os.path.join(self.tool_dir, executable))
                 command_line = self.version_string_cmd.replace(executable, abs_executable, 1)
                 self.version_string_cmd = version_cmd_interpreter + " " + command_line
+
         # Parallelism for tasks, read from tool config.
-        parallelism = root.find("parallelism")
-        if parallelism is not None and parallelism.get("method"):
-            self.parallelism = ParallelismInfo(parallelism)
-        else:
-            self.parallelism = None
+        self.parallelism = tool_source.parse_parallelism()
+
         # Get JobToolConfiguration(s) valid for this particular Tool.  At least
         # a 'default' will be provided that uses the 'default' handler and
         # 'default' destination.  I thought about moving this to the
@@ -1468,15 +1480,65 @@
         # In the toolshed context, there is no job config.
         if 'job_config' in dir(self.app):
             self.job_tool_configurations = self.app.job_config.get_job_tool_configurations(self_ids)
+
         # Is this a 'hidden' tool (hidden in tool menu)
-        self.hidden = xml_text(root, "hidden")
-        if self.hidden:
-            self.hidden = string_as_bool(self.hidden)
+        self.hidden = tool_source.parse_hidden()
+
+        self.__parse_legacy_features(tool_source)
+
+        # Load any tool specific options (optional)
+        self.options = dict( sanitize=True, refresh=False )
+        self.__update_options_dict( tool_source )
+        self.options = Bunch(** self.options)
+
+        # Parse tool inputs (if there are any required)
+        self.parse_inputs( tool_source )
+
+        # Parse tool help
+        self.parse_help( tool_source )
+
+        # Description of outputs produced by an invocation of the tool
+        self.parse_outputs( tool_source )
+
+        # Parse result handling for tool exit codes and stdout/stderr messages:
+        self.parse_stdio( tool_source )
+        # Any extra generated config files for the tool
+        self.__parse_config_files(tool_source)
+        # Action
+        action = tool_source.parse_action_module()
+        if action is None:
+            self.tool_action = self.default_tool_action()
+        else:
+            module, cls = action
+            mod = __import__( module, globals(), locals(), [cls])
+            self.tool_action = getattr( mod, cls )()
+        # Tests
+        self.__parse_tests(tool_source)
+
+        # Requirements (dependencies)
+        requirements, containers = tool_source.parse_requirements_and_containers()
+        self.requirements = requirements
+        self.containers = containers
+
+        self.citations = self._parse_citations( tool_source )
+
+        # Determine if this tool can be used in workflows
+        self.is_workflow_compatible = self.check_workflow_compatible(tool_source)
+        self.__parse_trackster_conf( tool_source )
+
+    def __parse_legacy_features(self, tool_source):
+        self.code_namespace = dict()
+        self.hook_map = {}
+        self.uihints = {}
+
+        if not hasattr(tool_source, 'root'):
+            return
+
+        # TODO: Move following logic into XmlToolSource.
+        root = tool_source.root
         # Load any tool specific code (optional) Edit: INS 5/29/2007,
         # allow code files to have access to the individual tool's
         # "module" if it has one.  Allows us to reuse code files, etc.
-        self.code_namespace = dict()
-        self.hook_map = {}
         for code_elem in root.findall("code"):
             for hook_elem in code_elem.findall("hook"):
                 for key, value in hook_elem.items():
@@ -1485,25 +1547,36 @@
             file_name = code_elem.get("file")
             code_path = os.path.join( self.tool_dir, file_name )
             execfile( code_path, self.code_namespace )
-        # Load any tool specific options (optional)
-        self.options = dict( sanitize=True, refresh=False )
+
+        # User interface hints
+        uihints_elem = root.find( "uihints" )
+        if uihints_elem is not None:
+            for key, value in uihints_elem.attrib.iteritems():
+                self.uihints[ key ] = value
+
+    def __update_options_dict(self, tool_source):
+        # TODO: Move following logic into ToolSource abstraction.
+        if not hasattr(tool_source, 'root'):
+            return
+
+        root = tool_source.root
         for option_elem in root.findall("options"):
             for option, value in self.options.copy().items():
                 if isinstance(value, type(False)):
                     self.options[option] = string_as_bool(option_elem.get(option, str(value)))
                 else:
                     self.options[option] = option_elem.get(option, str(value))
-        self.options = Bunch(** self.options)
-        # Parse tool inputs (if there are any required)
-        self.parse_inputs( root )
-        # Parse tool help
-        self.parse_help( root )
-        # Description of outputs produced by an invocation of the tool
-        self.parse_outputs( root )
-        # Parse result handling for tool exit codes and stdout/stderr messages:
-        self.parse_stdio( root )
-        # Any extra generated config files for the tool
+
+    def __parse_tests(self, tool_source):
+        self.__tests_source = tool_source
+        self.__tests_populated = False
+
+    def __parse_config_files(self, tool_source):
         self.config_files = []
+        if not hasattr(tool_source, 'root'):
+            return
+
+        root = tool_source.root
         conf_parent_elem = root.find("configfiles")
         if conf_parent_elem:
             for conf_elem in conf_parent_elem.findall( "configfile" ):
@@ -1511,87 +1584,65 @@
                 filename = conf_elem.get( "filename", None )
                 text = conf_elem.text
                 self.config_files.append( ( name, filename, text ) )
-        # Action
-        action_elem = root.find( "action" )
-        if action_elem is None:
-            self.tool_action = self.default_tool_action()
-        else:
-            module = action_elem.get( 'module' )
-            cls = action_elem.get( 'class' )
-            mod = __import__( module, globals(), locals(), [cls])
-            self.tool_action = getattr( mod, cls )()
-        # User interface hints
-        self.uihints = {}
-        uihints_elem = root.find( "uihints" )
-        if uihints_elem is not None:
-            for key, value in uihints_elem.attrib.iteritems():
-                self.uihints[ key ] = value
-        # Tests
-        self.__tests_elem = root.find( "tests" )
-        self.__tests_populated = False
 
-        # Requirements (dependencies)
-        requirements, containers = parse_requirements_from_xml( root )
-        self.requirements = requirements
-        self.containers = containers
+    def __parse_trackster_conf(self, tool_source):
+        self.trackster_conf = None
+        if not hasattr(tool_source, 'root'):
+            return
 
-        self.citations = self._parse_citations( root )
-
-        # Determine if this tool can be used in workflows
-        self.is_workflow_compatible = self.check_workflow_compatible(root)
         # Trackster configuration.
-        trackster_conf = root.find( "trackster_conf" )
+        trackster_conf = tool_source.root.find( "trackster_conf" )
         if trackster_conf is not None:
             self.trackster_conf = TracksterConfig.parse( trackster_conf )
-        else:
-            self.trackster_conf = None
 
     @property
     def tests( self ):
         if not self.__tests_populated:
-            tests_elem = self.__tests_elem
-            if tests_elem:
+            tests_source = self.__tests_source
+            if tests_source:
                 try:
-                    self.__tests = parse_tests_elem( self, tests_elem )
+                    self.__tests = parse_tests( self, tests_source )
                 except:
+                    self.__tests = None
                     log.exception( "Failed to parse tool tests" )
             else:
                 self.__tests = None
             self.__tests_populated = True
         return self.__tests
 
-    def parse_inputs( self, root ):
+    def parse_inputs( self, tool_source ):
         """
         Parse the "<inputs>" element and create appropriate `ToolParameter`s.
         This implementation supports multiple pages and grouping constructs.
         """
         # Load parameters (optional)
-        input_elem = root.find("inputs")
+        pages = tool_source.parse_input_pages()
         enctypes = set()
-        if input_elem is not None:
-            # Handle properties of the input form
-            self.check_values = string_as_bool( input_elem.get("check_values", self.check_values ) )
-            self.nginx_upload = string_as_bool( input_elem.get( "nginx_upload", self.nginx_upload ) )
-            self.action = input_elem.get( 'action', self.action )
-            # If we have an nginx upload, save the action as a tuple instead of
-            # a string. The actual action needs to get url_for run to add any
-            # prefixes, and we want to avoid adding the prefix to the
-            # nginx_upload_path. This logic is handled in the tool_form.mako
-            # template.
-            if self.nginx_upload and self.app.config.nginx_upload_path:
-                if '?' in urllib.unquote_plus( self.action ):
-                    raise Exception( 'URL parameters in a non-default tool action can not be used '
-                                     'in conjunction with nginx upload.  Please convert them to '
-                                     'hidden POST parameters' )
-                self.action = (self.app.config.nginx_upload_path + '?nginx_redir=',
-                               urllib.unquote_plus(self.action))
-            self.target = input_elem.get( "target", self.target )
-            self.method = input_elem.get( "method", self.method )
-            # Parse the actual parameters
-            # Handle multiple page case
-            pages = input_elem.findall( "page" )
-            for page in ( pages or [ input_elem ] ):
-                display, inputs = self.parse_input_page( page, enctypes )
+        if pages.inputs_defined:
+            if hasattr(pages, "input_elem"):
+                input_elem = pages.input_elem
+                # Handle properties of the input form
+                self.check_values = string_as_bool( input_elem.get("check_values", self.check_values ) )
+                self.nginx_upload = string_as_bool( input_elem.get( "nginx_upload", self.nginx_upload ) )
+                self.action = input_elem.get( 'action', self.action )
+                # If we have an nginx upload, save the action as a tuple instead of
+                # a string. The actual action needs to get url_for run to add any
+                # prefixes, and we want to avoid adding the prefix to the
+                # nginx_upload_path. This logic is handled in the tool_form.mako
+                # template.
+                if self.nginx_upload and self.app.config.nginx_upload_path:
+                    if '?' in urllib.unquote_plus( self.action ):
+                        raise Exception( 'URL parameters in a non-default tool action can not be used '
+                                         'in conjunction with nginx upload.  Please convert them to '
+                                         'hidden POST parameters' )
+                    self.action = (self.app.config.nginx_upload_path + '?nginx_redir=',
+                                   urllib.unquote_plus(self.action))
+                self.target = input_elem.get( "target", self.target )
+                self.method = input_elem.get( "method", self.method )
+                # Parse the actual parameters
+                # Handle multiple page case
+            for page_source in pages.page_sources:
+                display, inputs = self.parse_input_page( page_source, enctypes )
                 self.inputs_by_page.append( inputs )
                 self.inputs.update( inputs )
                 self.display_by_page.append( display )
@@ -1613,24 +1664,28 @@
         # thus hardcoded)  FIXME: hidden parameters aren't
         # parameters at all really, and should be passed in a different
         # way, making this check easier.
-        self.template_macro_params = template_macro_params(root)
+        template_macros = {}
+        if hasattr(tool_source, 'root'):
+            template_macros = template_macro_params(tool_source.root)
+        self.template_macro_params = template_macros
         for param in self.inputs.values():
             if not isinstance( param, ( HiddenToolParameter, BaseURLToolParameter ) ):
                 self.input_required = True
                 break
 
-    def parse_help( self, root ):
+    def parse_help( self, tool_source ):
         """
         Parse the help text for the tool. Formatted in reStructuredText, but
         stored as Mako to allow for dynamic image paths.
         This implementation supports multiple pages.
         """
         # TODO: Allow raw HTML or an external link.
-        self.help = root.find("help")
+        self.help = None
         self.help_by_page = list()
         help_header = ""
         help_footer = ""
-        if self.help is not None:
+        if hasattr( tool_source, 'root' ) and tool_source.root.find( 'help' ) is not None:
+            self.help = tool_source.root.find( 'help' )
             if self.repository_id and self.help.text.find( '.. image:: ' ) >= 0:
                 # Handle tool help image display for tools that are contained in repositories in the tool shed or installed into Galaxy.
                 lock = threading.Lock()
@@ -1667,203 +1722,32 @@
         while len( self.help_by_page ) < self.npages:
             self.help_by_page.append( self.help )
 
-    def parse_outputs( self, root ):
+    def parse_outputs( self, tool_source ):
         """
         Parse <outputs> elements and fill in self.outputs (keyed by name)
         """
         self.outputs = odict()
-        out_elem = root.find("outputs")
-        if not out_elem:
-            return
-        for data_elem in out_elem.findall("data"):
-            output = ToolOutput( data_elem.get("name") )
-            output.format = data_elem.get("format", "data")
-            output.change_format = data_elem.findall("change_format")
-            output.format_source = data_elem.get("format_source", None)
-            output.metadata_source = data_elem.get("metadata_source", "")
-            output.parent = data_elem.get("parent", None)
-            output.label = xml_text( data_elem, "label" )
-            output.count = int( data_elem.get("count", 1) )
-            output.filters = data_elem.findall( 'filter' )
-            output.from_work_dir = data_elem.get("from_work_dir", None)
-            output.hidden = string_as_bool( data_elem.get("hidden", "") )
-            output.tool = self
-            output.actions = ToolOutputActionGroup( output, data_elem.find( 'actions' ) )
-            output.dataset_collectors = output_collect.dataset_collectors_from_elem( data_elem )
+        for output in tool_source.parse_outputs(self):
             self.outputs[ output.name ] = output
 
     # TODO: Include the tool's name in any parsing warnings.
-    def parse_stdio( self, root ):
+    def parse_stdio( self, tool_source ):
         """
         Parse <stdio> element(s) and fill in self.return_codes,
         self.stderr_rules, and self.stdout_rules. Return codes have a range
         and an error type (fault or warning).  Stderr and stdout rules have
         a regular expression and an error level (fault or warning).
         """
-        try:
-            self.stdio_exit_codes = list()
-            self.stdio_regexes = list()
+        exit_codes, regexes = tool_source.parse_stdio()
+        self.stdio_exit_codes = exit_codes
+        self.stdio_regexes = regexes
 
-            # We should have a single <stdio> element, but handle the case for
-            # multiples.
-            # For every stdio element, add all of the exit_code and regex
-            # subelements that we find:
-            for stdio_elem in ( root.findall( 'stdio' ) ):
-                self.parse_stdio_exit_codes( stdio_elem )
-                self.parse_stdio_regexes( stdio_elem )
-        except Exception:
-            log.error( "Exception in parse_stdio! " + str(sys.exc_info()) )
+    def _parse_citations( self, tool_source ):
+        # TODO: Move following logic into ToolSource abstraction.
+        if not hasattr(tool_source, 'root'):
+            return []
 
-    def parse_stdio_exit_codes( self, stdio_elem ):
-        """
-        Parse the tool's <stdio> element's <exit_code> subelements.
-        This will add all of those elements, if any, to self.stdio_exit_codes.
-        """
-        try:
-            # Look for all <exit_code> elements. Each exit_code element must
-            # have a range/value.
-            # Exit-code ranges have precedence over a single exit code.
-            # So if there are value and range attributes, we use the range
-            # attribute. If there is neither a range nor a value, then print
-            # a warning and skip to the next.
-            for exit_code_elem in ( stdio_elem.findall( "exit_code" ) ):
-                exit_code = ToolStdioExitCode()
-                # Each exit code has an optional description that can be
-                # part of the "desc" or "description" attributes:
-                exit_code.desc = exit_code_elem.get( "desc" )
-                if None == exit_code.desc:
-                    exit_code.desc = exit_code_elem.get( "description" )
-                # Parse the error level:
-                exit_code.error_level = (
-                    self.parse_error_level( exit_code_elem.get( "level" )))
-                code_range = exit_code_elem.get( "range", "" )
-                if None == code_range:
-                    code_range = exit_code_elem.get( "value", "" )
-                if None == code_range:
-                    log.warning( "Tool stdio exit codes must have "
-                               + "a range or value" )
-                    continue
-                # Parse the range. We look for:
-                #   :Y
-                #  X:
-                #  X:Y   - Split on the colon. We do not allow a colon
-                #          without a beginning or end, though we could.
-                # Also note that whitespace is eliminated.
-                # TODO: Turn this into a single match - it should be
-                # more efficient.
-                code_range = re.sub( "\s", "", code_range )
-                code_ranges = re.split( ":", code_range )
-                if ( len( code_ranges ) == 2 ):
-                    if ( None == code_ranges[0] or '' == code_ranges[0] ):
-                        exit_code.range_start = float( "-inf" )
-                    else:
-                        exit_code.range_start = int( code_ranges[0] )
-                    if ( None == code_ranges[1] or '' == code_ranges[1] ):
-                        exit_code.range_end = float( "inf" )
-                    else:
-                        exit_code.range_end = int( code_ranges[1] )
-                # If we got more than one colon, then ignore the exit code.
-                elif ( len( code_ranges ) > 2 ):
-                    log.warning( "Invalid tool exit_code range %s - ignored"
-                               % code_range )
-                    continue
-                # Else we have a singular value. If it's not an integer, then
-                # we'll just write a log message and skip this exit_code.
-                else:
-                    try:
-                        exit_code.range_start = int( code_range )
-                    except:
-                        log.error( code_range )
-                        log.warning( "Invalid range start for tool's exit_code %s: exit_code ignored" % code_range )
-                        continue
-                    exit_code.range_end = exit_code.range_start
-                # TODO: Check if we got ">", ">=", "<", or "<=":
-                # Check that the range, regardless of how we got it,
-                # isn't bogus. If we have two infinite values, then
-                # the start must be -inf and the end must be +inf.
-                # So at least warn about this situation:
-                if ( isinf( exit_code.range_start ) and
-                     isinf( exit_code.range_end ) ):
-                    log.warning( "Tool exit_code range %s will match on "
-                               + "all exit codes" % code_range )
-                self.stdio_exit_codes.append( exit_code )
-        except Exception:
-            log.error( "Exception in parse_stdio_exit_codes! "
-                     + str(sys.exc_info()) )
-            trace = sys.exc_info()[2]
-            if ( None != trace ):
-                trace_msg = repr( traceback.format_tb( trace ) )
-                log.error( "Traceback: %s" % trace_msg )
-
-    def parse_stdio_regexes( self, stdio_elem ):
-        """
-        Look in the tool's <stdio> elem for all <regex> subelements
-        that define how to look for warnings and fatal errors in
-        stdout and stderr. This will add all such regex elements
-        to the Tols's stdio_regexes list.
-        """
-        try:
-            # Look for every <regex> subelement. The regular expression
-            # will have "match" and "source" (or "src") attributes.
-            for regex_elem in ( stdio_elem.findall( "regex" ) ):
-                # TODO: Fill in ToolStdioRegex
-                regex = ToolStdioRegex()
-                # Each regex has an optional description that can be
-                # part of the "desc" or "description" attributes:
-                regex.desc = regex_elem.get( "desc" )
-                if None == regex.desc:
-                    regex.desc = regex_elem.get( "description" )
-                # Parse the error level
-                regex.error_level = (
-                    self.parse_error_level( regex_elem.get( "level" ) ) )
-                regex.match = regex_elem.get( "match", "" )
-                if None == regex.match:
-                    # TODO: Convert the offending XML element to a string
-                    log.warning( "Ignoring tool's stdio regex element %s - "
-                                 "the 'match' attribute must exist" )
-                    continue
-                # Parse the output sources. We look for the "src", "source",
-                # and "sources" attributes, in that order. If there is no
-                # such source, then the source defaults to stderr & stdout.
-                # Look for a comma and then look for "err", "error", "out",
-                # and "output":
-                output_srcs = regex_elem.get( "src" )
-                if None == output_srcs:
-                    output_srcs = regex_elem.get( "source" )
-                if None == output_srcs:
-                    output_srcs = regex_elem.get( "sources" )
-                if None == output_srcs:
-                    output_srcs = "output,error"
-                output_srcs = re.sub( "\s", "", output_srcs )
-                src_list = re.split( ",", output_srcs )
-                # Just put together anything to do with "out", including
-                # "stdout", "output", etc. Repeat for "stderr", "error",
-                # and anything to do with "err". If neither stdout nor
-                # stderr were specified, then raise a warning and scan both.
-                for src in src_list:
-                    if re.search( "both", src, re.IGNORECASE ):
-                        regex.stdout_match = True
-                        regex.stderr_match = True
-                    if re.search( "out", src, re.IGNORECASE ):
-                        regex.stdout_match = True
-                    if re.search( "err", src, re.IGNORECASE ):
-                        regex.stderr_match = True
-                    if (not regex.stdout_match and not regex.stderr_match):
-                        log.warning( "Tool id %s: unable to determine if tool "
-                                     "stream source scanning is output, error, "
-                                     "or both. Defaulting to use both." % self.id )
-                        regex.stdout_match = True
-                        regex.stderr_match = True
-                self.stdio_regexes.append( regex )
-        except Exception:
-            log.error( "Exception in parse_stdio_exit_codes! "
-                     + str(sys.exc_info()) )
-            trace = sys.exc_info()[2]
-            if ( None != trace ):
-                trace_msg = repr( traceback.format_tb( trace ) )
-                log.error( "Traceback: %s" % trace_msg )
-
-    def _parse_citations( self, root ):
+        root = tool_source.root
         citations = []
         citations_elem = root.find("citations")
         if not citations_elem:
@@ -1877,49 +1761,18 @@
                 citations.append( citation )
         return citations
 
-    # TODO: This method doesn't have to be part of the Tool class.
-    def parse_error_level( self, err_level ):
-        """
-        Parses error level and returns error level enumeration. If
-        unparsable, returns 'fatal'
-        """
-        return_level = StdioErrorLevel.FATAL
-        try:
-            if err_level:
-                if ( re.search( "log", err_level, re.IGNORECASE ) ):
-                    return_level = StdioErrorLevel.LOG
-                elif ( re.search( "warning", err_level, re.IGNORECASE ) ):
-                    return_level = StdioErrorLevel.WARNING
-                elif ( re.search( "fatal", err_level, re.IGNORECASE ) ):
-                    return_level = StdioErrorLevel.FATAL
-                else:
-                    log.debug( "Tool %s: error level %s did not match log/warning/fatal" %
-                               ( self.id, err_level ) )
-        except Exception:
-            log.error( "Exception in parse_error_level "
-                     + str(sys.exc_info() ) )
-            trace = sys.exc_info()[2]
-            if ( None != trace ):
-                trace_msg = repr( traceback.format_tb( trace ) )
-                log.error( "Traceback: %s" % trace_msg )
-        return return_level
-
-    def parse_input_page( self, input_elem, enctypes ):
+    def parse_input_page( self, page_source, enctypes ):
         """
         Parse a page of inputs. This basically just calls 'parse_input_elem',
         but it also deals with possible 'display' elements which are supported
         only at the top/page level (not in groups).
         """
-        inputs = self.parse_input_elem( input_elem, enctypes )
+        inputs = self.parse_input_elem( page_source, enctypes )
         # Display
-        display_elem = input_elem.find("display")
-        if display_elem is not None:
-            display = xml_to_string(display_elem)
-        else:
-            display = None
+        display = page_source.parse_display()
         return display, inputs
 
-    def parse_input_elem( self, parent_elem, enctypes, context=None ):
+    def parse_input_elem( self, page_source, enctypes, context=None ):
         """
         Parse a parent element whose children are inputs -- these could be
         groups (repeat, conditional) or param elements. Groups will be parsed
@@ -1927,29 +1780,31 @@
         """
         rval = odict()
         context = ExpressionContext( rval, context )
-        for elem in parent_elem:
+        for input_source in page_source.parse_input_sources():
             # Repeat group
-            if elem.tag == "repeat":
+            input_type = input_source.parse_input_type()
+            if input_type == "repeat":
                 group = Repeat()
-                group.name = elem.get( "name" )
-                group.title = elem.get( "title" )
-                group.help = elem.get( "help", None )
-                group.inputs = self.parse_input_elem( elem, enctypes, context )
-                group.default = int( elem.get( "default", 0 ) )
-                group.min = int( elem.get( "min", 0 ) )
+                group.name = input_source.get( "name" )
+                group.title = input_source.get( "title" )
+                group.help = input_source.get( "help", None )
+                page_source = input_source.parse_nested_inputs_source()
+                group.inputs = self.parse_input_elem( page_source, enctypes, context )
+                group.default = int( input_source.get( "default", 0 ) )
+                group.min = int( input_source.get( "min", 0 ) )
                 # Use float instead of int so that 'inf' can be used for no max
-                group.max = float( elem.get( "max", "inf" ) )
+                group.max = float( input_source.get( "max", "inf" ) )
                 assert group.min <= group.max, \
                     ValueError( "Min repeat count must be less-than-or-equal to the max." )
                 # Force default to be within min-max range
                 group.default = min( max( group.default, group.min ), group.max )
                 rval[group.name] = group
-            elif elem.tag == "conditional":
+            elif input_type == "conditional":
                 group = Conditional()
-                group.name = elem.get( "name" )
-                group.value_ref = elem.get( 'value_ref', None )
-                group.value_ref_in_group = string_as_bool( elem.get( 'value_ref_in_group', 'True' ) )
-                value_from = elem.get( "value_from" )
+                group.name = input_source.get( "name" )
+                group.value_ref = input_source.get( 'value_ref', None )
+                group.value_ref_in_group = input_source.get_bool( 'value_ref_in_group', True )
+                value_from = input_source.get("value_from", None)
                 if value_from:
                     value_from = value_from.split( ':' )
                     group.value_from = locals().get( value_from[0] )
@@ -1961,24 +1816,23 @@
                         case = ConditionalWhen()
                         case.value = case_value
                         if case_inputs:
-                            case.inputs = self.parse_input_elem(
-                                ElementTree.XML( "<when>%s</when>" % case_inputs ), enctypes, context )
+                            page_source = XmlPageSource( ElementTree.XML( "<when>%s</when>" % case_inputs ) )
+                            case.inputs = self.parse_input_elem( page_source, enctypes, context )
                         else:
                             case.inputs = odict()
                         group.cases.append( case )
                 else:
                     # Should have one child "input" which determines the case
-                    input_elem = elem.find( "param" )
-                    assert input_elem is not None, "<conditional> must have a child <param>"
-                    group.test_param = self.parse_param_elem( input_elem, enctypes, context )
+                    test_param_input_source = input_source.parse_test_input_source()
+                    group.test_param = self.parse_param_elem( test_param_input_source, enctypes, context )
                     possible_cases = list( group.test_param.legal_values )  # store possible cases, undefined whens will have no inputs
                     # Must refresh when test_param changes
                     group.test_param.refresh_on_change = True
                     # And a set of possible cases
-                    for case_elem in elem.findall( "when" ):
+                    for (value, case_inputs_source) in input_source.parse_when_input_sources():
                         case = ConditionalWhen()
-                        case.value = case_elem.get( "value" )
-                        case.inputs = self.parse_input_elem( case_elem, enctypes, context )
+                        case.value = value
+                        case.inputs = self.parse_input_elem( case_inputs_source, enctypes, context )
                         group.cases.append( case )
                         try:
                             possible_cases.remove( case.value )
@@ -1993,7 +1847,8 @@
                         case.inputs = odict()
                         group.cases.append( case )
                 rval[group.name] = group
-            elif elem.tag == "upload_dataset":
+            elif input_type == "upload_dataset":
+                elem = input_source.elem()
                 group = UploadDataset()
                 group.name = elem.get( "name" )
                 group.title = elem.get( "title" )
@@ -2003,23 +1858,24 @@
                 rval[ group.file_type_name ].refresh_on_change = True
                 rval[ group.file_type_name ].refresh_on_change_values = \
                     self.app.datatypes_registry.get_composite_extensions()
-                group.inputs = self.parse_input_elem( elem, enctypes, context )
+                group_page_source = XmlPageSource(elem)
+                group.inputs = self.parse_input_elem( group_page_source, enctypes, context )
                 rval[ group.name ] = group
-            elif elem.tag == "param":
-                param = self.parse_param_elem( elem, enctypes, context )
+            elif input_type == "param":
+                param = self.parse_param_elem( input_source, enctypes, context )
                 rval[param.name] = param
                 if hasattr( param, 'data_ref' ):
                     param.ref_input = context[ param.data_ref ]
                 self.input_params.append( param )
         return rval
 
-    def parse_param_elem( self, input_elem, enctypes, context ):
+    def parse_param_elem( self, input_source, enctypes, context ):
         """
         Parse a single "<param>" element and return a ToolParameter instance.
         Also, if the parameter has a 'required_enctype' add it to the set
         enctypes.
         """
-        param = ToolParameter.build( self, input_elem )
+        param = ToolParameter.build( self, input_source )
         param_enctype = param.get_required_enctype()
         if param_enctype:
             enctypes.add( param_enctype )
@@ -2039,7 +1895,7 @@
                 self.repository_owner = tool_shed_repository.owner
                 self.installed_changeset_revision = tool_shed_repository.installed_changeset_revision
 
-    def check_workflow_compatible( self, root ):
+    def check_workflow_compatible( self, tool_source ):
         """
         Determine if a tool can be used in workflows. External tools and the
         upload tool are currently not supported by workflows.
@@ -2052,8 +1908,11 @@
         # right now
         if self.tool_type.startswith( 'data_source' ):
             return False
-        if not string_as_bool( root.get( "workflow_compatible", "True" ) ):
-            return False
+
+        if hasattr( tool_source, "root"):
+            root = tool_source.root
+            if not string_as_bool( root.get( "workflow_compatible", "True" ) ):
+                return False
         # TODO: Anyway to capture tools that dynamically change their own
         #       outputs?
         return True
@@ -3263,8 +3122,8 @@
     def _build_GALAXY_URL_parameter( self ):
         return ToolParameter.build( self, ElementTree.XML( '<param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=%s" />' % self.id ) )
 
-    def parse_inputs( self, root ):
-        super( DataSourceTool, self ).parse_inputs( root )
+    def parse_inputs( self, tool_source ):
+        super( DataSourceTool, self ).parse_inputs( tool_source )
         if 'GALAXY_URL' not in self.inputs:
             self.inputs[ 'GALAXY_URL' ] = self._build_GALAXY_URL_parameter()
             self.inputs_by_page[0][ 'GALAXY_URL' ] = self.inputs[ 'GALAXY_URL' ]
@@ -3503,6 +3362,33 @@
         self.desc = ""
 
 
+class TestCollectionDef( object ):
+
+    def __init__( self, elem, parse_param_elem ):
+        self.elements = []
+        attrib = dict( elem.attrib )
+        self.collection_type = attrib[ "type" ]
+        self.name = attrib.get( "name", "Unnamed Collection" )
+        for element in elem.findall( "element" ):
+            element_attrib = dict( element.attrib )
+            element_identifier = element_attrib[ "name" ]
+            nested_collection_elem = element.find( "collection" )
+            if nested_collection_elem:
+                self.elements.append( ( element_identifier, TestCollectionDef( nested_collection_elem, parse_param_elem ) ) )
+            else:
+                self.elements.append( ( element_identifier, parse_param_elem( element ) ) )
+
+    def collect_inputs( self ):
+        inputs = []
+        for element in self.elements:
+            value = element[ 1 ]
+            if isinstance( value, TestCollectionDef ):
+                inputs.extend( value.collect_inputs() )
+            else:
+                inputs.append( value )
+        return inputs
+
+
 def json_fix( val ):
     if isinstance( val, list ):
         return [ json_fix( v ) for v in val ]

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/tools/deps/requirements.py
--- a/lib/galaxy/tools/deps/requirements.py
+++ b/lib/galaxy/tools/deps/requirements.py
@@ -45,6 +45,12 @@
         return ContainerDescription( identifier=identifier, type=type )
 
 
+def parse_requirements_from_dict( root_dict ):
+    requirements = root_dict.get("requirements", [])
+    containers = root_dict.get("containers", [])
+    return map(ToolRequirement.from_dict, requirements), map(ContainerDescription.from_dict, containers)
+
+
 def parse_requirements_from_xml( xml_root ):
     """
 

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/tools/imp_exp/__init__.py
--- a/lib/galaxy/tools/imp_exp/__init__.py
+++ b/lib/galaxy/tools/imp_exp/__init__.py
@@ -13,12 +13,7 @@
 
 log = logging.getLogger(__name__)
 
-
-def load_history_imp_exp_tools( toolbox ):
-    """ Adds tools for importing/exporting histories to archives. """
-    # Use same process as that used in load_external_metadata_tool; see that
-    # method for why create tool description files on the fly.
-    tool_xml_text = """
+EXPORT_HISTORY_TEXT = """
         <tool id="__EXPORT_HISTORY__" name="Export History" version="0.1" tool_type="export_history"><type class="ExportHistoryTool" module="galaxy.tools"/><action module="galaxy.tools.actions.history_imp_exp" class="ExportHistoryToolAction"/>
@@ -32,7 +27,14 @@
             <data format="gzip" name="output_file"/></outputs></tool>
-    """
+"""
+
+
+def load_history_imp_exp_tools( toolbox ):
+    """ Adds tools for importing/exporting histories to archives. """
+    # Use same process as that used in load_external_metadata_tool; see that
+    # method for why create tool description files on the fly.
+    tool_xml_text = EXPORT_HISTORY_TEXT
 
     # Load export tool.
     tmp_name = tempfile.NamedTemporaryFile()

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/tools/parameters/basic.py
--- a/lib/galaxy/tools/parameters/basic.py
+++ b/lib/galaxy/tools/parameters/basic.py
@@ -18,6 +18,8 @@
 from sanitize import ToolParameterSanitizer
 import validation
 import dynamic_options
+import galaxy.tools.parser
+from ..parser import get_input_source as ensure_input_source
 from ..parameters import history_query
 from .dataset_matcher import DatasetMatcher
 from .dataset_matcher import DatasetCollectionMatcher
@@ -37,24 +39,34 @@
     """
     dict_collection_visible_keys = ( 'name', 'type', 'label', 'help' )
 
-    def __init__( self, tool, param, context=None ):
+    def __init__( self, tool, input_source, context=None ):
+        input_source = ensure_input_source(input_source)
         self.tool = tool
         self.refresh_on_change = False
         self.refresh_on_change_values = []
-        self.name = param.get("name")
-        self.type = param.get("type")
-        self.label = util.xml_text(param, "label")
-        self.help = util.xml_text(param, "help")
-        self.sanitizer = param.find( "sanitizer" )
-        if self.sanitizer is not None:
-            self.sanitizer = ToolParameterSanitizer.from_element( self.sanitizer )
+        self.name = input_source.get("name")
+        self.type = input_source.get("type")
+        self.label = input_source.parse_label()
+        self.help = input_source.parse_help()
+        sanitizer_elem = input_source.parse_sanitizer_elem()
+        if sanitizer_elem:
+            self.sanitizer = ToolParameterSanitizer.from_element( sanitizer_elem )
+        else:
+            self.sanitizer = None
         self.html = "no html set"
-        self.repeat = param.get("repeat", None)
-        self.condition = param.get( "condition", None )
+        try:
+            # These don't do anything right? These we should
+            # delete these two lines and eliminate checks for
+            # self.repeat in this file. -John
+            self.repeat = input_source.elem().get("repeat", None)
+            self.condition = input_source.elem().get( "condition", None )
+        except Exception:
+            self.repeat = None
+
         # Optional DataToolParameters are used in tools like GMAJ and LAJ
-        self.optional = string_as_bool( param.get( 'optional', False ) )
+        self.optional = input_source.parse_optional()
         self.validators = []
-        for elem in param.findall("validator"):
+        for elem in input_source.parse_validator_elems():
             self.validators.append( validation.Validator.from_element( self, elem ) )
 
     @property
@@ -232,11 +244,12 @@
     >>> print p.get_html( value="meh" )
     <input type="text" name="blah" size="4" value="meh">
     """
-    def __init__( self, tool, elem ):
-        ToolParameter.__init__( self, tool, elem )
-        self.size = elem.get( 'size' )
-        self.value = elem.get( 'value' )
-        self.area = string_as_bool( elem.get( 'area', False ) )
+    def __init__( self, tool, input_source ):
+        input_source = ensure_input_source(input_source)
+        ToolParameter.__init__( self, tool, input_source )
+        self.size = input_source.get( 'size' )
+        self.value = input_source.get( 'value' )
+        self.area = input_source.get_bool( 'area', False )
 
     def get_html_field( self, trans=None, value=None, other_values={} ):
         if value is None:
@@ -274,8 +287,9 @@
 
     dict_collection_visible_keys = ToolParameter.dict_collection_visible_keys + ( 'min', 'max' )
 
-    def __init__( self, tool, elem ):
-        TextToolParameter.__init__( self, tool, elem )
+    def __init__( self, tool, input_source ):
+        input_source = ensure_input_source(input_source)
+        TextToolParameter.__init__( self, tool, input_source )
         if self.value:
             try:
                 int( self.value )
@@ -283,8 +297,8 @@
                 raise ValueError( "An integer is required" )
         elif self.value is None and not self.optional:
             raise ValueError( "The settings for the field named '%s' require a 'value' setting and optionally a default value which must be an integer" % self.name )
-        self.min = elem.get( 'min' )
-        self.max = elem.get( 'max' )
+        self.min = input_source.get( 'min' )
+        self.max = input_source.get( 'max' )
         if self.min:
             try:
                 self.min = int( self.min )
@@ -352,10 +366,11 @@
 
     dict_collection_visible_keys = ToolParameter.dict_collection_visible_keys + ( 'min', 'max' )
 
-    def __init__( self, tool, elem ):
-        TextToolParameter.__init__( self, tool, elem )
-        self.min = elem.get( 'min' )
-        self.max = elem.get( 'max' )
+    def __init__( self, tool, input_source ):
+        input_source = ensure_input_source(input_source)
+        TextToolParameter.__init__( self, tool, input_source )
+        self.min = input_source.get( 'min' )
+        self.max = input_source.get( 'max' )
         if self.value:
             try:
                 float( self.value )
@@ -429,11 +444,12 @@
     >>> print p.to_param_dict_string( False )
     cellophane chests
     """
-    def __init__( self, tool, elem ):
-        ToolParameter.__init__( self, tool, elem )
-        self.truevalue = elem.get( 'truevalue', 'true' )
-        self.falsevalue = elem.get( 'falsevalue', 'false' )
-        self.checked = string_as_bool( elem.get( 'checked' ) )
+    def __init__( self, tool, input_source ):
+        input_source = ensure_input_source(input_source)
+        ToolParameter.__init__( self, tool, input_source )
+        self.truevalue = input_source.get( 'truevalue', 'true' )
+        self.falsevalue = input_source.get( 'falsevalue', 'false' )
+        self.checked = input_source.get_bool( 'checked', False )
 
     def get_html_field( self, trans=None, value=None, other_values={} ):
         checked = self.checked
@@ -490,12 +506,13 @@
     >>> print p.get_html()
     <input type="file" name="blah" galaxy-ajax-upload="true">
     """
-    def __init__( self, tool, elem ):
+    def __init__( self, tool, input_source ):
         """
         Example: C{<param name="bins" type="file" />}
         """
-        ToolParameter.__init__( self, tool, elem )
-        self.ajax = string_as_bool( elem.get( 'ajax-upload' ) )
+        input_source = ensure_input_source(input_source)
+        ToolParameter.__init__( self, tool, input_source )
+        self.ajax = input_source.get_bool( 'ajax-upload', False )
 
     def get_html_field( self, trans=None, value=None, other_values={}  ):
         return form_builder.FileField( self.name, ajax=self.ajax, value=value )
@@ -553,11 +570,12 @@
     """
     Parameter that takes a file uploaded via FTP as a value.
     """
-    def __init__( self, tool, elem ):
+    def __init__( self, tool, input_source ):
         """
         Example: C{<param name="bins" type="file" />}
         """
-        ToolParameter.__init__( self, tool, elem )
+        input_source = ensure_input_source(input_source)
+        ToolParameter.__init__( self, tool, input_source )
 
     @property
     def visible( self ):
@@ -608,9 +626,10 @@
     >>> print p.get_html()
     <input type="hidden" name="blah" value="wax so rockin">
     """
-    def __init__( self, tool, elem ):
-        ToolParameter.__init__( self, tool, elem )
-        self.value = elem.get( 'value' )
+    def __init__( self, tool, input_source ):
+        input_source = ensure_input_source( input_source )
+        ToolParameter.__init__( self, tool, input_source )
+        self.value = input_source.get( 'value' )
 
     def get_html_field( self, trans=None, value=None, other_values={} ):
         return form_builder.HiddenField( self.name, self.value )
@@ -632,9 +651,10 @@
     current server base url. Used in all redirects.
     """
 
-    def __init__( self, tool, elem ):
-        ToolParameter.__init__( self, tool, elem )
-        self.value = elem.get( 'value', '' )
+    def __init__( self, tool, input_source ):
+        input_source = ensure_input_source( input_source )
+        ToolParameter.__init__( self, tool, input_source )
+        self.value = input_source.get( 'value', '' )
 
     def get_value( self, trans ):
         # url = trans.request.base + self.value
@@ -737,30 +757,25 @@
     >>> print p.to_param_dict_string( ["y", "z"] )
     y,z
     """
-    def __init__( self, tool, elem, context=None ):
-        ToolParameter.__init__( self, tool, elem )
-        self.multiple = string_as_bool( elem.get( 'multiple', False ) )
+    def __init__( self, tool, input_source, context=None ):
+        input_source = ensure_input_source( input_source )
+        ToolParameter.__init__( self, tool, input_source )
+        self.multiple = input_source.get_bool( 'multiple', False )
         # Multiple selects are optional by default, single selection is the inverse.
-        self.optional = string_as_bool( elem.get( 'optional', self.multiple ) )
-        self.display = elem.get( 'display', None )
-        self.separator = elem.get( 'separator', ',' )
+        self.optional = input_source.parse_optional( self.multiple )
+        self.display = input_source.get( 'display', None )
+        self.separator = input_source.get( 'separator', ',' )
         self.legal_values = set()
         # TODO: the <dynamic_options> tag is deprecated and should be replaced with the <options> tag.
-        self.dynamic_options = elem.get( "dynamic_options", None )
-        options = elem.find( 'options' )
-        if options is None:
-            self.options = None
-        else:
-            self.options = dynamic_options.DynamicOptions( options, self )
+        self.dynamic_options = input_source.get( "dynamic_options", None )
+        self.options = input_source.parse_dynamic_options(self)
+        if self.options is not None:
             for validator in self.options.validators:
                 self.validators.append( validator )
         if self.dynamic_options is None and self.options is None:
-            self.static_options = list()
-            for index, option in enumerate( elem.findall( "option" ) ):
-                value = option.get( "value" )
+            self.static_options = input_source.parse_static_options()
+            for (title, value, selected) in self.static_options:
                 self.legal_values.add( value )
-                selected = string_as_bool( option.get( "selected", False ) )
-                self.static_options.append( ( option.text or value, value, selected ) )
         self.is_dynamic = ( ( self.dynamic_options is not None ) or ( self.options is not None ) )
 
     def get_options( self, trans, other_values ):
@@ -1133,29 +1148,24 @@
     >>> print clp.name
     numerical_column
     """
-    def __init__( self, tool, elem ):
-        SelectToolParameter.__init__( self, tool, elem )
+    def __init__( self, tool, input_source ):
+        input_source = ensure_input_source( input_source )
+        SelectToolParameter.__init__( self, tool, input_source )
         self.tool = tool
-        self.numerical = string_as_bool( elem.get( "numerical", False ))
-        # Allow specifing force_select for backward compat., but probably
-        # should use optional going forward for consistency with other
-        # parameters.
-        if "force_select" in elem.attrib:
-            self.force_select = string_as_bool( elem.get( "force_select" ) )
-        else:
-            self.force_select = not string_as_bool( elem.get( "optional", False ) )
-        self.accept_default = string_as_bool( elem.get( "accept_default", False ))
-        self.data_ref = elem.get( "data_ref", None )
+        self.numerical = input_source.get_bool( "numerical", False )
+        self.force_select = not input_source.parse_optional( False )
+        self.accept_default = input_source.get_bool( "accept_default", False )
+        self.data_ref = input_source.get( "data_ref", None )
         self.ref_input = None
         # Legacy style default value specification...
-        self.default_value = elem.get( "default_value", None )
+        self.default_value = input_source.get( "default_value", None )
         if self.default_value is None:
             # Newer style... more in line with other parameters.
-            self.default_value = elem.get( "value", None )
+            self.default_value = input_source.get( "value", None )
         if self.default_value is not None:
             self.default_value = ColumnListParameter._strip_c( self.default_value )
         self.is_dynamic = True
-        self.usecolnames = string_as_bool( elem.get( "use_header_names", False ))
+        self.usecolnames = input_source.get_bool( "use_header_names", False )
 
     def from_html( self, value, trans=None, context={} ):
         """
@@ -1413,13 +1423,18 @@
     >>> print p.options
     [{'selected': False, 'name': 'Heading 1', 'value': 'heading1', 'options': [{'selected': False, 'name': 'Option 1', 'value': 'option1', 'options': []}, {'selected': False, 'name': 'Option 2', 'value': 'option2', 'options': []}, {'selected': False, 'name': 'Heading 1', 'value': 'heading1', 'options': [{'selected': False, 'name': 'Option 3', 'value': 'option3', 'options': []}, {'selected': False, 'name': 'Option 4', 'value': 'option4', 'options': []}]}]}, {'selected': False, 'name': 'Option 5', 'value': 'option5', 'options': []}]
     """
-    def __init__( self, tool, elem, context=None ):
+    def __init__( self, tool, input_source, context=None ):
+        input_source = ensure_input_source( input_source )
+
         def recurse_option_elems( cur_options, option_elems ):
             for option_elem in option_elems:
                 selected = string_as_bool( option_elem.get( 'selected', False ) )
                 cur_options.append( { 'name': option_elem.get( 'name' ), 'value': option_elem.get( 'value' ), 'options': [], 'selected': selected  } )
                 recurse_option_elems( cur_options[-1]['options'], option_elem.findall( 'option' ) )
-        ToolParameter.__init__( self, tool, elem )
+        ToolParameter.__init__( self, tool, input_source )
+        # TODO: abstract XML out of here - so non-XML InputSources can
+        # specify DrillDown parameters.
+        elem = input_source.elem()
         self.multiple = string_as_bool( elem.get( 'multiple', False ) )
         self.display = elem.get( 'display', None )
         self.hierarchy = elem.get( 'hierarchy', 'exact' )  # exact or recurse
@@ -1653,8 +1668,8 @@
 
 class BaseDataToolParameter( ToolParameter ):
 
-    def __init__( self, tool, elem, trans ):
-        super(BaseDataToolParameter, self).__init__( tool, elem )
+    def __init__( self, tool, input_source, trans ):
+        super(BaseDataToolParameter, self).__init__( tool, input_source )
 
     def _get_history( self, trans, history=None ):
         class_name = self.__class__.__name__
@@ -1693,32 +1708,27 @@
             datatypes_registry = tool.app.datatypes_registry
         return datatypes_registry
 
-    def _parse_formats( self, trans, tool, elem ):
+    def _parse_formats( self, trans, tool, input_source ):
         datatypes_registry = self._datatypes_registery( trans, tool )
 
         # Build tuple of classes for supported data formats
         formats = []
-        self.extensions = elem.get( 'format', 'data' ).split( "," )
+        self.extensions = input_source.get( 'format', 'data' ).split( "," )
         normalized_extensions = [extension.strip().lower() for extension in self.extensions]
         for extension in normalized_extensions:
             formats.append( datatypes_registry.get_datatype_by_extension( extension ) )
         self.formats = formats
 
-    def _parse_options( self, elem ):
+    def _parse_options( self, input_source ):
         # TODO: Enhance dynamic options for DataToolParameters. Currently,
         #       only the special case key='build' of type='data_meta' is
         #       a valid filter
-        options = elem.find( 'options' )
-        if options is None:
-            self.options = None
-            self.options_filter_attribute = None
-        else:
-            self.options = dynamic_options.DynamicOptions( options, self )
-
-            #HACK to get around current hardcoded limitation of when a set of dynamic options is defined for a DataToolParameter
-            #it always causes available datasets to be filtered by dbkey
-            #this behavior needs to be entirely reworked (in a backwards compatible manner)
-            self.options_filter_attribute = options.get(  'options_filter_attribute', None )
+        self.options_filter_attribute = None
+        self.options = input_source.parse_dynamic_options( self )
+        if self.options:
+            # TODO: Abstract away XML handling here.
+            options_elem = input_source.elem().find('options')
+            self.options_filter_attribute = options_elem.get(  'options_filter_attribute', None )
         self.is_dynamic = self.options is not None
 
     def _switch_fields( self, fields, default_field ):
@@ -1743,21 +1753,19 @@
     security stuff will dramatically alter this anyway.
     """
 
-    def __init__( self, tool, elem, trans=None):
-        super(DataToolParameter, self).__init__( tool, elem, trans )
+    def __init__( self, tool, input_source, trans=None):
+        input_source = ensure_input_source( input_source )
+        super(DataToolParameter, self).__init__( tool, input_source, trans )
         # Add metadata validator
-        if not string_as_bool( elem.get( 'no_validation', False ) ):
+        if not input_source.get_bool( 'no_validation', False ):
             self.validators.append( validation.MetadataValidator() )
-        self._parse_formats( trans, tool, elem )
-        self.multiple = string_as_bool( elem.get( 'multiple', False ) )
-        self._parse_options( elem )
+        self._parse_formats( trans, tool, input_source )
+        self.multiple = input_source.get_bool('multiple', False)
+        self._parse_options( input_source )
         # Load conversions required for the dataset input
         self.conversions = []
-        for conv_elem in elem.findall( "conversion" ):
-            name = conv_elem.get( "name" )  # name for commandline substitution
-            conv_extensions = conv_elem.get( "type" )  # target datatype extension
-            # FIXME: conv_extensions should be able to be an ordered list
-            assert None not in [ name, type ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, type )
+        for name, conv_extensions in input_source.parse_conversion_tuples():
+            assert None not in [ name, conv_extensions ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, conv_extensions )
             conv_types = [ tool.app.datatypes_registry.get_datatype_by_extension( conv_extensions.lower() ) ]
             self.conversions.append( ( name, conv_extensions, conv_types ) )
 
@@ -2149,16 +2157,17 @@
     """
     """
 
-    def __init__( self, tool, elem, trans=None ):
-        super(DataCollectionToolParameter, self).__init__( tool, elem, trans )
-        self.elem = elem
-        self._parse_formats( trans, tool, elem )
+    def __init__( self, tool, input_source, trans=None ):
+        input_source = ensure_input_source( input_source )
+        super(DataCollectionToolParameter, self).__init__( tool, input_source, trans )
+        self._parse_formats( trans, tool, input_source )
+        self._collection_type = input_source.get("collection_type", None)
         self.multiple = False  # Accessed on DataToolParameter a lot, may want in future
-        self._parse_options( elem )  # TODO: Review and test.
+        self._parse_options( input_source )  # TODO: Review and test.
 
     @property
     def collection_type( self ):
-        return self.elem.get( "collection_type", None )
+        return self._collection_type
 
     def _history_query( self, trans ):
         dataset_collection_type_descriptions = trans.app.dataset_collections_service.collection_type_descriptions

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/tools/parameters/history_query.py
--- a/lib/galaxy/tools/parameters/history_query.py
+++ b/lib/galaxy/tools/parameters/history_query.py
@@ -11,10 +11,10 @@
         self.collection_type_description = kwargs.get( "collection_type_description", None )
 
     @staticmethod
-    def from_parameter_elem( elem, collection_type_descriptions ):
+    def from_parameter( param, collection_type_descriptions ):
         """ Take in a tool parameter element.
         """
-        collection_type = elem.get( "collection_type", None )
+        collection_type = param.collection_type
         if collection_type:
             collection_type_description = collection_type_descriptions.for_collection_type( collection_type )
         else:

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/tools/parameters/output_collect.py
--- a/lib/galaxy/tools/parameters/output_collect.py
+++ b/lib/galaxy/tools/parameters/output_collect.py
@@ -186,6 +186,10 @@
         return map( lambda elem: DatasetCollector( **elem.attrib ), primary_dataset_elems )
 
 
+def dataset_collectors_from_list( discover_datasets_dicts ):
+    return map( lambda kwds: DatasetCollector( **kwds ), discover_datasets_dicts )
+
+
 class DatasetCollector( object ):
 
     def __init__( self, **kwargs ):

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/tools/parser/__init__.py
--- /dev/null
+++ b/lib/galaxy/tools/parser/__init__.py
@@ -0,0 +1,7 @@
+""" Package responsible for parsing tools from files/abstract tool sources.
+"""
+from .interface import ToolSource
+from .factory import get_tool_source
+from .factory import get_input_source
+
+__all__ = ["ToolSource", "get_tool_source", "get_input_source"]

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/tools/parser/factory.py
--- /dev/null
+++ b/lib/galaxy/tools/parser/factory.py
@@ -0,0 +1,47 @@
+from __future__ import absolute_import
+
+try:
+    from galaxy import eggs
+    eggs.require("PyYAML")
+except ImportError:
+    pass
+
+import yaml
+
+from .yaml import YamlToolSource
+from .xml import XmlToolSource
+from .xml import XmlInputSource
+from .interface import InputSource
+
+
+from galaxy.tools.loader import load_tool as load_tool_xml
+
+
+import logging
+log = logging.getLogger(__name__)
+
+
+def get_tool_source(config_file, enable_beta_formats=True):
+    if not enable_beta_formats:
+        tree = load_tool_xml(config_file)
+        root = tree.getroot()
+        return XmlToolSource(root)
+
+    if config_file.endswith(".yml"):
+        log.info("Loading tool from YAML - this is experimental - tool will not function in future.")
+        with open(config_file, "r") as f:
+            as_dict = yaml.load(f)
+            return YamlToolSource(as_dict)
+    else:
+        tree = load_tool_xml(config_file)
+        root = tree.getroot()
+        return XmlToolSource(root)
+
+
+def get_input_source(content):
+    """ Wraps XML elements in a XmlInputSource until everything
+    is consumed using the tool source interface.
+    """
+    if not isinstance(content, InputSource):
+        content = XmlInputSource(content)
+    return content

diff -r b9c5b028e389678c1a073f11c9aba5104ae17e89 -r 2dc383cbb700f9d8d6a30eb976b70e8dcf1832db lib/galaxy/tools/parser/interface.py
--- /dev/null
+++ b/lib/galaxy/tools/parser/interface.py
@@ -0,0 +1,246 @@
+from abc import ABCMeta
+from abc import abstractmethod
+
+NOT_IMPLEMENTED_MESSAGE = "Galaxy tool format does not yet support this tool feature."
+
+
+class ToolSource(object):
+    """ This interface represents an abstract source to parse tool
+    information from.
+    """
+    __metaclass__ = ABCMeta
+    default_is_multi_byte = False
+
+    @abstractmethod
+    def parse_id(self):
+        """ Parse an ID describing the abstract tool. This is not the
+        GUID tracked by the tool shed but the simple id (there may be
+        multiple tools loaded in Galaxy with this same simple id).
+        """
+
+    @abstractmethod
+    def parse_version(self):
+        """ Parse a version describing the abstract tool.
+        """
+
+    def parse_tool_module(self):
+        """ Load Tool class from a custom module. (Optional).
+
+        If not None, return pair containing module and class (as strings).
+        """
+        return None
+
+    def parse_action_module(self):
+        """ Load Tool class from a custom module. (Optional).
+
+        If not None, return pair containing module and class (as strings).
+        """
+        return None
+
+    def parse_tool_type(self):
+        """ Load simple tool type string (e.g. 'data_source', 'default').
+        """
+        return None
+
+    @abstractmethod
+    def parse_name(self):
+        """ Parse a short name for tool (required). """
+
+    @abstractmethod
+    def parse_description(self):
+        """ Parse a description for tool. Longer than name, shorted than help. """
+
+    def parse_is_multi_byte(self):
+        """ Parse is_multi_byte from tool - TODO: figure out what this is and
+        document.
+        """
+        return self.default_is_multi_byte
+
+    def parse_display_interface(self, default):
+        """ Parse display_interface - fallback to default for the tool type
+        (supplied as default parameter) if not specified.
+        """
+        return default
+
+    def parse_require_login(self, default):
+        """ Parse whether the tool requires login (as a bool).
+        """
+        return default
+
+    def parse_request_param_translation_elem(self):
+        """ Return an XML element describing require parameter translation.
+
+        If we wish to support this feature for non-XML based tools this should
+        be converted to return some sort of object interface instead of a RAW
+        XML element.
+        """
+        return None
+
+    @abstractmethod
+    def parse_command(self):
+        """ Return string contianing command to run.
+        """
+
+    @abstractmethod
+    def parse_interpreter(self):
+        """ Return string containing the interpreter to prepend to the command
+        (for instance this might be 'python' to run a Python wrapper located
+        adjacent to the tool).
+        """
+
+    def parse_redirect_url_params_elem(self):
+        """ Return an XML element describing redirect_url_params.
+
+        If we wish to support this feature for non-XML based tools this should
+        be converted to return some sort of object interface instead of a RAW
+        XML element.
+        """
+        return None
+
+    def parse_version_command(self):
+        """ Parse command used to determine version of primary application
+        driving the tool. Return None to not generate or record such a command.
+        """
+        return None
+
+    def parse_version_command_interpreter(self):
+        """ Parse command used to determine version of primary application
+        driving the tool. Return None to not generate or record such a command.
+        """
+        return None
+
+    def parse_parallelism(self):
+        """ Return a galaxy.jobs.ParallismInfo object describing task splitting
+        or None.
+        """
+        return None
+
+    def parse_hidden(self):
+        """ Return boolean indicating whether tool should be hidden in the tool menu.
+        """
+        return False
+
+    @abstractmethod
+    def parse_requirements_and_containers(self):
+        """ Return pair of ToolRequirement and ContainerDescription lists. """
+
+    @abstractmethod
+    def parse_input_pages(self):
+        """ Return a PagesSource representing inputs by page for tool. """
+
+    @abstractmethod
+    def parse_outputs(self, tool):
+        """ Return a list of ToolOutput objects.
+        """
+
+    @abstractmethod
+    def parse_stdio(self):
+        """ Builds lists of ToolStdioExitCode and ToolStdioRegex objects
+        to describe tool execution error conditions.
+        """
+        return [], []
+
+    def parse_tests_to_dict(self):
+        return {'tests': []}
+
+
+class PagesSource(object):
+    """ Contains a list of Pages - each a list of InputSources -
+    each item in the outer list representing a page of inputs.
+    Pages are deprecated so ideally this outer list will always
+    be exactly a singleton.
+    """
+    def __init__(self, page_sources):
+        self.page_sources = page_sources
+
+    @property
+    def inputs_defined(self):
+        return True
+
+
+class PageSource(object):
+    __metaclass__ = ABCMeta
+
+    def parse_display(self):
+        return None
+
+    @abstractmethod
+    def parse_input_sources(self):
+        """ Return a list of InputSource objects. """
+
+
+class InputSource(object):
+    __metaclass__ = ABCMeta
+    default_optional = False
+
+    def elem(self):
+        # For things in transition that still depend on XML - provide a way
+        # to grab it and just throw an error if feature is attempted to be
+        # used with other tool sources.
+        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
+
+    @abstractmethod
+    def get(self, key, value=None):
+        """ Return simple named properties as string for this input source.
+        keys to be supported depend on the parameter type.
+        """
+
+    @abstractmethod
+    def get_bool(self, key, default):
+        """ Return simple named properties as boolean for this input source.
+        keys to be supported depend on the parameter type.
+        """
+
+    def parse_label(self):
+        return self.get("label")
+
+    def parse_help(self):
+        return self.get("label")
+
+    def parse_sanitizer_elem(self):
+        """ Return an XML description of sanitizers. This is a stop gap
+        until we can rework galaxy.tools.parameters.sanitize to not
+        explicitly depend on XML.
+        """
+        return None
+
+    def parse_validator_elems(self):
+        """ Return an XML description of sanitizers. This is a stop gap
+        until we can rework galaxy.tools.parameters.validation to not
+        explicitly depend on XML.
+        """
+        return []
+
+    def parse_optional(self, default=None):
+        """ Return boolean indicating wheter parameter is optional. """
+        if default is None:
+            default = self.default_optional
+        return self.get_bool( "optional", default )
+
+    def parse_dynamic_options(self, param):
+        """ Return a galaxy.tools.parameters.dynamic_options.DynamicOptions
+        if appropriate.
+        """
+        return None
+
+    def parse_static_options(self):
+        """ Return list of static options if this is a select type without
+        defining a dynamic options.
+        """
+        return []
+
+    def parse_conversion_tuples(self):
+        """ Return list of (name, extension) to describe explicit conversions.
+        """
+        return []
+
+    def parse_nested_inputs_source(self):
+        # For repeats
+        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
+
+    def parse_test_input_source(self):
+        # For conditionals
+        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
+
+    def parse_when_input_sources(self):
+        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)

This diff is so big that we needed to truncate the remainder.

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

commits-noreply＠bitbucket.org

tags

participants (1)