commit/galaxy-central: jmchilton: Refactor job status checking logic into its own module.

10 Oct 2013

1 new commit in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/c78f3f687fbc/
Changeset:   c78f3f687fbc
User:        jmchilton
Date:        2013-10-10 07:17:17
Summary:     Refactor job status checking logic into its own module.
Add unit tests for some basic behaviors.
Affected #:  5 files

diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 lib/galaxy/jobs/__init__.py

--- a/lib/galaxy/jobs/__init__.py
+++ b/lib/galaxy/jobs/__init__.py
@@ -25,6 +25,7 @@
 from galaxy.util.bunch import Bunch
 from galaxy.util.expressions import ExpressionContext
 from galaxy.util.json import from_json_string
+from .output_checker import check_output
 
 log = logging.getLogger( __name__ )
 
@@ -1079,158 +1080,7 @@
             self.cleanup()
 
     def check_tool_output( self, stdout, stderr, tool_exit_code, job ):
-        """
-        Check the output of a tool - given the stdout, stderr, and the tool's
-        exit code, return True if the tool exited succesfully and False
-        otherwise. No exceptions should be thrown. If this code encounters
-        an exception, it returns True so that the workflow can continue;
-        otherwise, a bug in this code could halt workflow progress.
-        Note that, if the tool did not define any exit code handling or
-        any stdio/stderr handling, then it reverts back to previous behavior:
-        if stderr contains anything, then False is returned.
-        Note that the job id is just for messages.
-        """
-        # By default, the tool succeeded. This covers the case where the code
-        # has a bug but the tool was ok, and it lets a workflow continue.
-        success = True
-
-        try:
-            # Check exit codes and match regular expressions against stdout and
-            # stderr if this tool was configured to do so.
-            # If there is a regular expression for scanning stdout/stderr,
-            # then we assume that the tool writer overwrote the default
-            # behavior of just setting an error if there is *anything* on
-            # stderr.
-            if ( len( self.tool.stdio_regexes ) > 0 or
-                 len( self.tool.stdio_exit_codes ) > 0 ):
-                # Check the exit code ranges in the order in which
-                # they were specified. Each exit_code is a StdioExitCode
-                # that includes an applicable range. If the exit code was in
-                # that range, then apply the error level and add a message.
-                # If we've reached a fatal error rule, then stop.
-                max_error_level = galaxy.tools.StdioErrorLevel.NO_ERROR
-                if tool_exit_code != None:
-                    for stdio_exit_code in self.tool.stdio_exit_codes:
-                        if ( tool_exit_code >= stdio_exit_code.range_start and
-                             tool_exit_code <= stdio_exit_code.range_end ):
-                            # Tack on a generic description of the code
-                            # plus a specific code description. For example,
-                            # this might prepend "Job 42: Warning (Out of Memory)\n".
-                            code_desc = stdio_exit_code.desc
-                            if ( None == code_desc ):
-                                code_desc = ""
-                            tool_msg = ( "%s: Exit code %d (%s)" % (
-                                         galaxy.tools.StdioErrorLevel.desc( stdio_exit_code.error_level ),
-                                         tool_exit_code,
-                                         code_desc ) )
-                            log.info( "Job %s: %s" % (job.get_id_tag(), tool_msg) )
-                            stderr = tool_msg + "\n" + stderr
-                            max_error_level = max( max_error_level,
-                                                   stdio_exit_code.error_level )
-                            if ( max_error_level >=
-                                 galaxy.tools.StdioErrorLevel.FATAL ):
-                                break
-
-                if max_error_level < galaxy.tools.StdioErrorLevel.FATAL:
-                    # We'll examine every regex. Each regex specifies whether
-                    # it is to be run on stdout, stderr, or both. (It is
-                    # possible for neither stdout nor stderr to be scanned,
-                    # but those regexes won't be used.) We record the highest
-                    # error level, which are currently "warning" and "fatal".
-                    # If fatal, then we set the job's state to ERROR.
-                    # If warning, then we still set the job's state to OK
-                    # but include a message. We'll do this if we haven't seen
-                    # a fatal error yet
-                    for regex in self.tool.stdio_regexes:
-                        # If ( this regex should be matched against stdout )
-                        #   - Run the regex's match pattern against stdout
-                        #   - If it matched, then determine the error level.
-                        #       o If it was fatal, then we're done - break.
-                        # Repeat the stdout stuff for stderr.
-                        # TODO: Collapse this into a single function.
-                        if ( regex.stdout_match ):
-                            regex_match = re.search( regex.match, stdout,
-                                                     re.IGNORECASE )
-                            if ( regex_match ):
-                                rexmsg = self.regex_err_msg( regex_match, regex)
-                                log.info( "Job %s: %s"
-                                        % ( job.get_id_tag(), rexmsg ) )
-                                stdout = rexmsg + "\n" + stdout
-                                max_error_level = max( max_error_level,
-                                                       regex.error_level )
-                                if ( max_error_level >=
-                                     galaxy.tools.StdioErrorLevel.FATAL ):
-                                    break
-
-                        if ( regex.stderr_match ):
-                            regex_match = re.search( regex.match, stderr,
-                                                     re.IGNORECASE )
-                            if ( regex_match ):
-                                rexmsg = self.regex_err_msg( regex_match, regex)
-                                log.info( "Job %s: %s"
-                                        % ( job.get_id_tag(), rexmsg ) )
-                                stderr = rexmsg + "\n" + stderr
-                                max_error_level = max( max_error_level,
-                                                       regex.error_level )
-                                if ( max_error_level >=
-                                     galaxy.tools.StdioErrorLevel.FATAL ):
-                                    break
-
-                # If we encountered a fatal error, then we'll need to set the
-                # job state accordingly. Otherwise the job is ok:
-                if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL:
-                    success = False
-                else:
-                    success = True
-
-            # When there are no regular expressions and no exit codes to check,
-            # default to the previous behavior: when there's anything on stderr
-            # the job has an error, and the job is ok otherwise.
-            else:
-                # TODO: Add in the tool and job id:
-                # log.debug( "Tool did not define exit code or stdio handling; "
-                #          + "checking stderr for success" )
-                if stderr:
-                    success = False
-                else:
-                    success = True
-
-        # On any exception, return True.
-        except:
-            tb = traceback.format_exc()
-            log.warning( "Tool check encountered unexpected exception; "
-                       + "assuming tool was successful: " + tb )
-            success = True
-
-        # Store the modified stdout and stderr in the job:
-        if None != job:
-            job.stdout = stdout
-            job.stderr = stderr
-
-        return success
-
-    def regex_err_msg( self, match, regex ):
-        """
-        Return a message about the match on tool output using the given
-        ToolStdioRegex regex object. The regex_match is a MatchObject
-        that will contain the string matched on.
-        """
-        # Get the description for the error level:
-        err_msg = galaxy.tools.StdioErrorLevel.desc( regex.error_level ) + ": "
-        # If there's a description for the regular expression, then use it.
-        # Otherwise, we'll take the first 256 characters of the match.
-        if None != regex.desc:
-            err_msg += regex.desc
-        else:
-            mstart = match.start()
-            mend = match.end()
-            err_msg += "Matched on "
-            # TODO: Move the constant 256 somewhere else besides here.
-            if mend - mstart > 256:
-                err_msg += match.string[ mstart : mstart+256 ] + "..."
-            else:
-                err_msg += match.string[ mstart: mend ]
-        return err_msg
+        return check_output( self.tool, stdout, stderr, tool_exit_code, job )
 
     def cleanup( self ):
         # remove temporary files

diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 lib/galaxy/jobs/error_level.py
--- /dev/null
+++ b/lib/galaxy/jobs/error_level.py
@@ -0,0 +1,25 @@
+
+
+# These determine stdio-based error levels from matching on regular expressions
+# and exit codes. They are meant to be used comparatively, such as showing
+# that warning < fatal. This is really meant to just be an enum.
+class StdioErrorLevel( object ):
+    NO_ERROR = 0
+    LOG = 1
+    WARNING = 2
+    FATAL = 3
+    MAX = 3
+    descs = {
+        NO_ERROR: 'No error',
+        LOG: 'Log',
+        WARNING: 'Warning',
+        FATAL: 'Fatal error',
+    }
+
+    @staticmethod
+    def desc( error_level ):
+        err_msg = "Unknown error"
+        if ( error_level > 0 and
+             error_level <= StdioErrorLevel.MAX ):
+            err_msg = StdioErrorLevel.descs[ error_level ]
+        return err_msg

diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 lib/galaxy/jobs/output_checker.py
--- /dev/null
+++ b/lib/galaxy/jobs/output_checker.py
@@ -0,0 +1,164 @@
+import re
+from .error_level import StdioErrorLevel
+import traceback
+
+from logging import getLogger
+log = getLogger( __name__ )
+
+
+def check_output( tool, stdout, stderr, tool_exit_code, job ):
+    """
+    Check the output of a tool - given the stdout, stderr, and the tool's
+    exit code, return True if the tool exited succesfully and False
+    otherwise. No exceptions should be thrown. If this code encounters
+    an exception, it returns True so that the workflow can continue;
+    otherwise, a bug in this code could halt workflow progress.
+
+    Note that, if the tool did not define any exit code handling or
+    any stdio/stderr handling, then it reverts back to previous behavior:
+    if stderr contains anything, then False is returned.
+
+    Note that the job id is just for messages.
+    """
+    # By default, the tool succeeded. This covers the case where the code
+    # has a bug but the tool was ok, and it lets a workflow continue.
+    success = True
+
+    try:
+        # Check exit codes and match regular expressions against stdout and
+        # stderr if this tool was configured to do so.
+        # If there is a regular expression for scanning stdout/stderr,
+        # then we assume that the tool writer overwrote the default
+        # behavior of just setting an error if there is *anything* on
+        # stderr.
+        if ( len( tool.stdio_regexes ) > 0 or
+             len( tool.stdio_exit_codes ) > 0 ):
+            # Check the exit code ranges in the order in which
+            # they were specified. Each exit_code is a StdioExitCode
+            # that includes an applicable range. If the exit code was in
+            # that range, then apply the error level and add a message.
+            # If we've reached a fatal error rule, then stop.
+            max_error_level = StdioErrorLevel.NO_ERROR
+            if tool_exit_code != None:
+                for stdio_exit_code in tool.stdio_exit_codes:
+                    if ( tool_exit_code >= stdio_exit_code.range_start and
+                         tool_exit_code <= stdio_exit_code.range_end ):
+                        # Tack on a generic description of the code
+                        # plus a specific code description. For example,
+                        # this might prepend "Job 42: Warning (Out of Memory)\n".
+                        code_desc = stdio_exit_code.desc
+                        if ( None == code_desc ):
+                            code_desc = ""
+                        tool_msg = ( "%s: Exit code %d (%s)" % (
+                                     StdioErrorLevel.desc( stdio_exit_code.error_level ),
+                                     tool_exit_code,
+                                     code_desc ) )
+                        log.info( "Job %s: %s" % (job.get_id_tag(), tool_msg) )
+                        stderr = tool_msg + "\n" + stderr
+                        max_error_level = max( max_error_level,
+                                               stdio_exit_code.error_level )
+                        if ( max_error_level >=
+                             StdioErrorLevel.FATAL ):
+                            break
+
+            if max_error_level < StdioErrorLevel.FATAL:
+                # We'll examine every regex. Each regex specifies whether
+                # it is to be run on stdout, stderr, or both. (It is
+                # possible for neither stdout nor stderr to be scanned,
+                # but those regexes won't be used.) We record the highest
+                # error level, which are currently "warning" and "fatal".
+                # If fatal, then we set the job's state to ERROR.
+                # If warning, then we still set the job's state to OK
+                # but include a message. We'll do this if we haven't seen
+                # a fatal error yet
+                for regex in tool.stdio_regexes:
+                    # If ( this regex should be matched against stdout )
+                    #   - Run the regex's match pattern against stdout
+                    #   - If it matched, then determine the error level.
+                    #       o If it was fatal, then we're done - break.
+                    # Repeat the stdout stuff for stderr.
+                    # TODO: Collapse this into a single function.
+                    if ( regex.stdout_match ):
+                        regex_match = re.search( regex.match, stdout,
+                                                 re.IGNORECASE )
+                        if ( regex_match ):
+                            rexmsg = __regex_err_msg( regex_match, regex)
+                            log.info( "Job %s: %s"
+                                    % ( job.get_id_tag(), rexmsg ) )
+                            stdout = rexmsg + "\n" + stdout
+                            max_error_level = max( max_error_level,
+                                                   regex.error_level )
+                            if ( max_error_level >=
+                                 StdioErrorLevel.FATAL ):
+                                break
+
+                    if ( regex.stderr_match ):
+                        regex_match = re.search( regex.match, stderr,
+                                                 re.IGNORECASE )
+                        if ( regex_match ):
+                            rexmsg = __regex_err_msg( regex_match, regex)
+                            log.info( "Job %s: %s"
+                                    % ( job.get_id_tag(), rexmsg ) )
+                            stderr = rexmsg + "\n" + stderr
+                            max_error_level = max( max_error_level,
+                                                   regex.error_level )
+                            if ( max_error_level >=
+                                 StdioErrorLevel.FATAL ):
+                                break
+
+            # If we encountered a fatal error, then we'll need to set the
+            # job state accordingly. Otherwise the job is ok:
+            if max_error_level >= StdioErrorLevel.FATAL:
+                success = False
+            else:
+                success = True
+
+        # When there are no regular expressions and no exit codes to check,
+        # default to the previous behavior: when there's anything on stderr
+        # the job has an error, and the job is ok otherwise.
+        else:
+            # TODO: Add in the tool and job id:
+            # log.debug( "Tool did not define exit code or stdio handling; "
+            #          + "checking stderr for success" )
+            if stderr:
+                success = False
+            else:
+                success = True
+
+    # On any exception, return True.
+    except:
+        tb = traceback.format_exc()
+        log.warning( "Tool check encountered unexpected exception; "
+                   + "assuming tool was successful: " + tb )
+        success = True
+
+    # Store the modified stdout and stderr in the job:
+    if None != job:
+        job.stdout = stdout
+        job.stderr = stderr
+
+    return success
+
+
+def __regex_err_msg( match, regex ):
+    """
+    Return a message about the match on tool output using the given
+    ToolStdioRegex regex object. The regex_match is a MatchObject
+    that will contain the string matched on.
+    """
+    # Get the description for the error level:
+    err_msg = StdioErrorLevel.desc( regex.error_level ) + ": "
+    # If there's a description for the regular expression, then use it.
+    # Otherwise, we'll take the first 256 characters of the match.
+    if None != regex.desc:
+        err_msg += regex.desc
+    else:
+        mstart = match.start()
+        mend = match.end()
+        err_msg += "Matched on "
+        # TODO: Move the constant 256 somewhere else besides here.
+        if mend - mstart > 256:
+            err_msg += match.string[ mstart : mstart + 256 ] + "..."
+        else:
+            err_msg += match.string[ mstart: mend ]
+    return err_msg

diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -34,6 +34,7 @@
 from sqlalchemy import and_
 
 from galaxy import jobs, model
+from galaxy.jobs.error_level import StdioErrorLevel
 from galaxy.datatypes.metadata import JobExternalOutputMetadataWrapper
 from galaxy.jobs import ParallelismInfo
 from galaxy.tools.actions import DefaultToolAction
@@ -64,33 +65,11 @@
 from tool_shed.util import shed_util_common
 from .loader import load_tool, template_macro_params
 
+
 log = logging.getLogger( __name__ )
 
 WORKFLOW_PARAMETER_REGULAR_EXPRESSION =  re.compile( '''\$\{.+?\}''' )
 
-# These determine stdio-based error levels from matching on regular expressions
-# and exit codes. They are meant to be used comparatively, such as showing
-# that warning < fatal. This is really meant to just be an enum.
-class StdioErrorLevel( object ):
-    NO_ERROR = 0
-    LOG  = 1
-    WARNING = 2
-    FATAL = 3
-    MAX = 3
-    descs = {
-        NO_ERROR : 'No error',
-        LOG: 'Log',
-        WARNING : 'Warning',
-        FATAL : 'Fatal error'
-    }
-    @staticmethod
-    def desc( error_level ):
-        err_msg = "Unknown error"
-        if ( error_level > 0 and
-             error_level <= StdioErrorLevel.MAX ):
-            err_msg = StdioErrorLevel.descs[ error_level ]
-        return err_msg
-
 class ToolNotFoundException( Exception ):
     pass
 

diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 test/unit/test_job_output_checker.py
--- /dev/null
+++ b/test/unit/test_job_output_checker.py
@@ -0,0 +1,62 @@
+from unittest import TestCase
+from galaxy.util.bunch import Bunch
+from galaxy.jobs.output_checker import check_output
+from galaxy.jobs.error_level import StdioErrorLevel
+
+
+class OutputCheckerTestCase( TestCase ):
+
+    def setUp( self ):
+        self.tool = Bunch(
+            stdio_regexes=[],
+            stdio_exit_codes=[],
+        )
+        self.job = Bunch(
+            stdout=None,
+            stderr=None,
+            get_id_tag=lambda: "test_id",
+        )
+        self.stdout = ''
+        self.stderr = ''
+        self.tool_exit_code = None
+
+    def test_default_no_stderr_success( self ):
+        self.__assertSuccessful()
+
+    def test_default_stderr_failure( self ):
+        self.stderr = 'foo'
+        self.__assertNotSuccessful()
+
+    def test_exit_code_error( self ):
+        mock_exit_code = Bunch( range_start=1, range_end=1, error_level=StdioErrorLevel.FATAL, desc=None )
+        self.tool.stdio_exit_codes.append( mock_exit_code )
+        self.tool_exit_code = 1
+        self.__assertNotSuccessful()
+
+    def test_exit_code_success( self ):
+        mock_exit_code = Bunch( range_start=1, range_end=1, error_level=StdioErrorLevel.FATAL, desc=None )
+        self.tool.stdio_exit_codes.append( mock_exit_code )
+        self.tool_exit_code = 0
+        self.__assertSuccessful()
+
+    def test_problematic_strings( self ):
+        problematic_str = '\x80abc'
+        regex_rule = Bunch( match=r'.abc', stdout_match=False, stderr_match=True, error_level=StdioErrorLevel.FATAL, desc=None )
+        self.tool.stdio_regexes = [ regex_rule ]
+        self.stderr = problematic_str
+        self.__assertNotSuccessful()
+
+        problematic_str = '\x80abc'
+        regex_rule = Bunch( match=r'.abcd', stdout_match=False, stderr_match=True, error_level=StdioErrorLevel.FATAL, desc=None )
+        self.tool.stdio_regexes = [ regex_rule ]
+        self.stderr = problematic_str
+        self.__assertSuccessful()
+
+    def __assertSuccessful( self ):
+        self.assertTrue( self.__check_output() )
+
+    def __assertNotSuccessful( self ):
+        self.assertFalse( self.__check_output() )
+
+    def __check_output( self ):
+        return check_output( self.tool, self.stdout, self.stderr, self.tool_exit_code, self.job )

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

commits-noreply＠bitbucket.org

tags

participants (1)