[galaxy-commits] commit/galaxy-central: Scott McManus: Added application of regular expressions and exit code. Pulling exit

19 Jun 2012

1 new commit in galaxy-central:


https://bitbucket.org/galaxy/galaxy-central/changeset/3b2dc0a51d14/
changeset:   3b2dc0a51d14
user:        Scott McManus
date:        2012-06-19 17:35:36
summary:     Added application of regular expressions and exit code. Pulling exit
codes from the runners will be part of a separate submission.
affected #:  3 files

diff -r 7c495f835a1d436ad33dff6107784f106cc24980 -r 3b2dc0a51d147c3fc8f0560d8fd3ebefc0792e99 lib/galaxy/jobs/__init__.py

--- a/lib/galaxy/jobs/__init__.py
+++ b/lib/galaxy/jobs/__init__.py
@@ -305,10 +305,81 @@
         if job.state == job.states.DELETED or job.state == job.states.ERROR:
             #ERROR at this point means the job was deleted by an administrator.
             return self.fail( job.info )
-        if stderr:
-            job.state = job.states.ERROR
+
+        err_msg = ""
+        # Check exit codes and match regular expressions against stdout and 
+        # stderr if this tool was configured to do so.
+        if ( len( self.tool.stdio_regexes ) > 0 or
+             len( self.tool.exit_codes ) > 0 ):
+            # We will check the exit code ranges in the order in which
+            # they were specified. Each exit_code is a ToolStdioExitCode
+            # that includes an applicable range. If the exit code was in
+            # that range, then apply the error level and add in a message.
+            # If we've reached a fatal error rule, then stop.
+            max_error_level = galaxy.tools.StdioErrorLevel.NO_ERROR
+            for exit_code in self.tool.stdio_exit_codes:
+                # TODO: Fetch the exit code from the .rc file:
+                tool_exit_code = 0
+                if ( tool_exit_code >= exit_code.range_start and 
+                     tool_exit_code <= exit_code.range_end ):
+                    if None != exit_code.desc:
+                        err_msg += exit_code.desc
+                    # TODO: Find somewhere to stick the err_msg - possibly to
+                    # the source (stderr/stdout), possibly in a new db column.
+                    max_error_level = max( max_error_level, 
+                                           exit_code.error_level )
+                    if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL:
+                        break
+            # If there is a regular expression for scanning stdout/stderr,
+            # then we assume that the tool writer overwrote the default 
+            # behavior of just setting an error if there is *anything* on
+            # stderr. 
+            if max_error_level < galaxy.tools.StdioErrorLevel.FATAL:
+                # We'll examine every regex. Each regex specifies whether
+                # it is to be run on stdout, stderr, or both. (It is 
+                # possible for neither stdout nor stderr to be scanned,
+                # but those won't be scanned.) We record the highest
+                # error level, which are currently "warning" and "fatal".
+                # If fatal, then we set the job's state to ERROR.
+                # If warning, then we still set the job's state to OK
+                # but include a message. We'll do this if we haven't seen 
+                # a fatal error yet
+                for regex in self.tool.stdio_regexes:
+                    # If ( this regex should be matched against stdout )
+                    #   - Run the regex's match pattern against stdout
+                    #   - If it matched, then determine the error level.
+                    #       o If it was fatal, then we're done - break.
+                    # Repeat the stdout stuff for stderr.
+                    # TODO: Collapse this into a single function.
+                    if ( regex.stdout_match ):
+                        regex_match = re.search( regex.match, stdout )
+                        if ( regex_match ):
+                            err_msg += self.regex_err_msg( regex_match, regex )
+                            max_error_level = max( max_error_level, regex.error_level )
+                            if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL:
+                                break
+                    if ( regex.stderr_match ): 
+                        regex_match = re.search( regex.match, stderr )
+                        if ( regex_match ):
+                            err_msg += self.regex_err_msg( regex_match, regex )
+                            max_error_level = max( max_error_level,
+                                                   regex.error_level )
+                            if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL:
+                                break
+            # If we encountered a fatal error, then we'll need to set the
+            # job state accordingly. Otherwise the job is ok:
+            if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL:
+                job.state = job.states.ERROR
+            else:
+                job.state = job.states.OK
+        # When there are no regular expressions and no exit codes to check,
+        # default to the previous behavior: when there's anything on stderr
+        # the job has an error, and the job is ok otherwise. 
         else:
-            job.state = job.states.OK
+            if stderr:
+                job.state = job.states.ERROR
+            else:
+                job.state = job.states.OK
         if self.version_string_cmd:
             version_filename = self.get_version_string_path()
             if os.path.exists(version_filename):
@@ -330,6 +401,7 @@
                         return self.fail( "Job %s's output dataset(s) could not be read" % job.id )
         job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) )
         job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None )
+
         def in_directory( file, directory ):
             # Make both absolute.
             directory = os.path.abspath( directory )
@@ -370,7 +442,11 @@
                 # Update (non-library) job output datasets through the object store
                 if dataset not in job.output_library_datasets:
                     self.app.object_store.update_from_file(dataset.dataset, create=True)
-                if context['stderr']:
+                # TODO: The context['stderr'] holds stderr's contents. An error
+                # only really occurs if the job also has an error. So check the
+                # job's state:
+                #if context['stderr']:
+                if job.states.ERROR == job.state:
                     dataset.blurb = "error"
                 elif dataset.has_data():
                     # If the tool was expected to set the extension, attempt to retrieve it
@@ -385,7 +461,14 @@
                      ( not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) \
                        and self.app.config.retry_metadata_internally ):
                         dataset.set_meta( overwrite = False )
-                    elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and not context['stderr']:
+                    # TODO: The context['stderr'] used to indicate that there
+                    # was an error. Now we must rely on the job's state instead;
+                    # that indicates whether the tool relied on stderr to indicate
+                    # the state or whether the tool used exit codes and regular
+                    # expressions to do so. So we use 
+                    # job.state == job.states.ERROR to replace this same test.
+                    #elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and not context['stderr']:
+                    elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and job.states.ERROR != job.state: 
                         dataset._state = model.Dataset.states.FAILED_METADATA
                     else:
                         #load metadata from file
@@ -415,7 +498,12 @@
                     if dataset.ext == 'auto':
                         dataset.extension = 'txt'
                 self.sa_session.add( dataset )
-            if context['stderr']:
+            # TODO: job.states.ERROR == job.state now replaces checking
+            # stderr for a problem:
+            #if context['stderr']:
+            if job.states.ERROR == job.state:
+                log.debug( "setting dataset state to ERROR" )
+                # TODO: This is where the state is being set to error. Change it!
                 dataset_assoc.dataset.dataset.state = model.Dataset.states.ERROR
             else:
                 dataset_assoc.dataset.dataset.state = model.Dataset.states.OK
@@ -480,6 +568,29 @@
         if self.app.config.cleanup_job == 'always' or ( not stderr and self.app.config.cleanup_job == 'onsuccess' ):
             self.cleanup()
 
+    def regex_err_msg( self, match, regex ):
+        """
+        Return a message about the match on tool output using the given
+        ToolStdioRegex regex object. The regex_match is a MatchObject
+        that will contain the string matched on.
+        """
+        # Get the description for the error level: 
+        err_msg = galaxy.tools.StdioErrorLevel.desc( regex.error_level ) + ": "
+        # If there's a description for the regular expression, then use it.
+        # Otherwise, we'll take the first 256 characters of the match.
+        if None != regex.desc:
+            err_msg += regex.desc
+        else:
+            mstart = match.start()
+            mend = match.end()
+            err_msg += "Matched on "
+            # TODO: Move the constant 256 somewhere else besides here.
+            if mend - mstart > 256:
+                err_msg += match.string[ mstart : mstart+256 ] + "..."
+            else:
+                err_msg += match.string[ mstart: mend ] 
+        return err_msg
+
     def cleanup( self ):
         # remove temporary files
         try:


diff -r 7c495f835a1d436ad33dff6107784f106cc24980 -r 3b2dc0a51d147c3fc8f0560d8fd3ebefc0792e99 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -37,6 +37,23 @@
 
 log = logging.getLogger( __name__ )
 
+# These determine stdio-based error levels from matching on regular expressions
+# and exit codes. They are meant to be used comparatively, such as showing
+# that warning < fatal. This is really meant to just be an enum. 
+class StdioErrorLevel( object ):
+    NO_ERROR = 0
+    WARNING = 1
+    FATAL = 2
+    MAX = 2
+    descs = {NO_ERROR : 'No error', WARNING : 'Warning', FATAL : 'Fatal error'}
+    @staticmethod
+    def desc( error_level ):
+        err_msg = "Unknown error"
+        if ( error_level > 0 and
+             error_level <= StdioErrorLevel.MAX ):
+            err_msg = StdioErrorLevel.descs[ error_level ]
+        return err_msg
+
 class ToolNotFoundException( Exception ):
     pass
 
@@ -1140,6 +1157,12 @@
             # a warning and skip to the next.
             for exit_code_elem in ( stdio_elem.findall( "exit_code" ) ):
                 exit_code = ToolStdioExitCode()
+                # Each exit code has an optional description that can be
+                # part of the "desc" or "description" attributes:
+                exit_code.desc = exit_code_elem.get( "desc" )
+                if None == exit_code.desc:
+                    exit_code.desc = exit_code_elem.get( "description" )
+                # Parse the error level: 
                 exit_code.error_level = (
                     self.parse_error_level( exit_code_elem.get( "level" )))
                 code_range = exit_code_elem.get( "range", "" )
@@ -1155,11 +1178,9 @@
                 #  X:Y   - Split on the colon. We do not allow a colon 
                 #          without a beginning or end, though we could. 
                 # Also note that whitespace is eliminated.
-                # TODO: Turn this into a single match - it will be 
-                # more efficient
-                string.strip( code_range )
+                # TODO: Turn this into a single match - it should be 
+                # more efficient.
                 code_range = re.sub( "\s", "", code_range )
-                log.debug( "Code range after sub: %s" % code_range )
                 code_ranges = re.split( ":", code_range )
                 if ( len( code_ranges ) == 2 ):
                     if ( None == code_ranges[0] or '' == code_ranges[0] ):
@@ -1216,6 +1237,12 @@
             for regex_elem in ( stdio_elem.findall( "regex" ) ):
                 # TODO: Fill in ToolStdioRegex
                 regex = ToolStdioRegex() 
+                # Each regex has an optional description that can be
+                # part of the "desc" or "description" attributes:
+                regex.desc = regex_elem.get( "desc" )
+                if None == regex.desc:
+                    regex.desc = regex_elem.get( "description" )
+                # Parse the error level 
                 regex.error_level = ( 
                     self.parse_error_level( regex_elem.get( "level" ) ) )
                 regex.match = regex_elem.get( "match", "" )
@@ -1243,9 +1270,9 @@
                 # and anything to do with "err". If neither stdout nor
                 # stderr were specified, then raise a warning and scan both.
                 for src in src_list:
-                    if re.match( "out", src, re.IGNORECASE ):
+                    if re.search( "out", src, re.IGNORECASE ):
                         regex.stdout_match = True
-                    if re.match( "err", src, re.IGNORECASE ):
+                    if re.search( "err", src, re.IGNORECASE ):
                         regex.stderr_match = True
                     if (not regex.stdout_match and not regex.stderr_match):
                         log.warning( "Unable to determine if tool stream "
@@ -1262,24 +1289,25 @@
                 trace_msg = repr( traceback.format_tb( trace ) )
                 log.error( "Traceback: %s" % trace_msg ) 
 
+    # TODO: This method doesn't have to be part of the Tool class.
     def parse_error_level( self, err_level ):
         """
         Return fatal or warning depending on what's in the error level.
         This will assume that the error level fatal is returned if it's 
-        unparsable. (This doesn't have to be part of the Tool class.)
+        unparsable. 
         """
         # What should the default be? I'm claiming it should be fatal:
         # if you went to the trouble to write the rule, then it's 
         # probably a problem. I think there are easily three substantial
         # camps: make it fatal, make it a warning, or, if it's missing,
-        # just throw an exception and ignore it.
-        return_level = "fatal"
+        # just throw an exception and ignore the exit_code element.
+        return_level = StdioErrorLevel.FATAL 
         try:
             if ( None != err_level ):
                 if ( re.search( "warning", err_level, re.IGNORECASE ) ):
-                    return_level = "warning"
+                    return_level = StdioErrorLevel.WARNING 
                 elif ( re.search( "fatal", err_level, re.IGNORECASE ) ):
-                    return_level = "fatal"
+                    return_level = StdioErrorLevel.FATAL
         except Exception, e:
             log.error( "Exception in parse_error_level " 
                      + str(sys.exc_info() ) )
@@ -2333,16 +2361,18 @@
             installed_tool_dependencies = self.tool_shed_repository.tool_dependencies
         else:
             installed_tool_dependencies = None
-        for requirement in self.requirements:
-            # TODO: currently only supporting requirements of type package,
-            #       need to implement some mechanism for mapping other types
-            #       back to packages
+         for requirement in self.requirements:
+             # TODO: currently only supporting requirements of type package,
+             #       need to implement some mechanism for mapping other types
+             #       back to packages
             log.debug( "Building dependency shell command for dependency '%s'", requirement.name )
-            if requirement.type == 'package':
+             if requirement.type == 'package':
                 script_file, base_path, version = self.app.toolbox.dependency_manager.find_dep( name=requirement.name,
                                                                                                 version=requirement.version,
                                                                                                 type=requirement.type,
                                                                                                 installed_tool_dependencies=installed_tool_dependencies )
+            if requirement.type == 'package':
+                script_file, base_path, version = self.app.toolbox.dependency_manager.find_dep( requirement.name, requirement.version )
                 if script_file is None and base_path is None:
                     log.warn( "Failed to resolve dependency on '%s', ignoring", requirement.name )
                 elif script_file is None:
@@ -2617,7 +2647,7 @@
                 elif isinstance( input, SelectToolParameter ):
                     param_dict.update( { 'type' : 'select', 
                                          'html' : urllib.quote( input.get_html( trans ) ),
-                                         'options': input.static_options
+                                         'options': input.static_options 
                                          } )
                 elif isinstance( input, Conditional ):
                     # TODO.
@@ -2626,7 +2656,7 @@
                     param_dict.update( { 'type' : 'number', 'init_value' : input.value,
                                          'html' : urllib.quote( input.get_html( trans ) ),
                                          'min': input.min,
-                                         'max': input.max,
+                                         'max': input.max
                                          'value': input.value
                                           } )
                 else:
@@ -2798,6 +2828,7 @@
         self.stderr_match = False
         # TODO: Define a common class or constant for error level:
         self.error_level = "fatal" 
+        self.desc = ""
 
 class ToolStdioExitCode( object ):
     """
@@ -2809,6 +2840,7 @@
         self.range_end = float( "inf" )
         # TODO: Define a common class or constant for error level:
         self.error_level = "fatal"
+        self.desc = ""
 
 class ToolParameterValueWrapper( object ):
     """

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

[galaxy-commits] commit/galaxy-central: Scott McManus: Added application of regular expressions and exit code. Pulling exit

Bitbucket