commit/galaxy-central: jmchilton: Refactor job status checking logic into its own module.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/c78f3f687fbc/ Changeset: c78f3f687fbc User: jmchilton Date: 2013-10-10 07:17:17 Summary: Refactor job status checking logic into its own module. Add unit tests for some basic behaviors. Affected #: 5 files diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -25,6 +25,7 @@ from galaxy.util.bunch import Bunch from galaxy.util.expressions import ExpressionContext from galaxy.util.json import from_json_string +from .output_checker import check_output log = logging.getLogger( __name__ ) @@ -1079,158 +1080,7 @@ self.cleanup() def check_tool_output( self, stdout, stderr, tool_exit_code, job ): - """ - Check the output of a tool - given the stdout, stderr, and the tool's - exit code, return True if the tool exited succesfully and False - otherwise. No exceptions should be thrown. If this code encounters - an exception, it returns True so that the workflow can continue; - otherwise, a bug in this code could halt workflow progress. - Note that, if the tool did not define any exit code handling or - any stdio/stderr handling, then it reverts back to previous behavior: - if stderr contains anything, then False is returned. - Note that the job id is just for messages. - """ - # By default, the tool succeeded. This covers the case where the code - # has a bug but the tool was ok, and it lets a workflow continue. - success = True - - try: - # Check exit codes and match regular expressions against stdout and - # stderr if this tool was configured to do so. - # If there is a regular expression for scanning stdout/stderr, - # then we assume that the tool writer overwrote the default - # behavior of just setting an error if there is *anything* on - # stderr. - if ( len( self.tool.stdio_regexes ) > 0 or - len( self.tool.stdio_exit_codes ) > 0 ): - # Check the exit code ranges in the order in which - # they were specified. Each exit_code is a StdioExitCode - # that includes an applicable range. If the exit code was in - # that range, then apply the error level and add a message. - # If we've reached a fatal error rule, then stop. - max_error_level = galaxy.tools.StdioErrorLevel.NO_ERROR - if tool_exit_code != None: - for stdio_exit_code in self.tool.stdio_exit_codes: - if ( tool_exit_code >= stdio_exit_code.range_start and - tool_exit_code <= stdio_exit_code.range_end ): - # Tack on a generic description of the code - # plus a specific code description. For example, - # this might prepend "Job 42: Warning (Out of Memory)\n". - code_desc = stdio_exit_code.desc - if ( None == code_desc ): - code_desc = "" - tool_msg = ( "%s: Exit code %d (%s)" % ( - galaxy.tools.StdioErrorLevel.desc( stdio_exit_code.error_level ), - tool_exit_code, - code_desc ) ) - log.info( "Job %s: %s" % (job.get_id_tag(), tool_msg) ) - stderr = tool_msg + "\n" + stderr - max_error_level = max( max_error_level, - stdio_exit_code.error_level ) - if ( max_error_level >= - galaxy.tools.StdioErrorLevel.FATAL ): - break - - if max_error_level < galaxy.tools.StdioErrorLevel.FATAL: - # We'll examine every regex. Each regex specifies whether - # it is to be run on stdout, stderr, or both. (It is - # possible for neither stdout nor stderr to be scanned, - # but those regexes won't be used.) We record the highest - # error level, which are currently "warning" and "fatal". - # If fatal, then we set the job's state to ERROR. - # If warning, then we still set the job's state to OK - # but include a message. We'll do this if we haven't seen - # a fatal error yet - for regex in self.tool.stdio_regexes: - # If ( this regex should be matched against stdout ) - # - Run the regex's match pattern against stdout - # - If it matched, then determine the error level. - # o If it was fatal, then we're done - break. - # Repeat the stdout stuff for stderr. - # TODO: Collapse this into a single function. - if ( regex.stdout_match ): - regex_match = re.search( regex.match, stdout, - re.IGNORECASE ) - if ( regex_match ): - rexmsg = self.regex_err_msg( regex_match, regex) - log.info( "Job %s: %s" - % ( job.get_id_tag(), rexmsg ) ) - stdout = rexmsg + "\n" + stdout - max_error_level = max( max_error_level, - regex.error_level ) - if ( max_error_level >= - galaxy.tools.StdioErrorLevel.FATAL ): - break - - if ( regex.stderr_match ): - regex_match = re.search( regex.match, stderr, - re.IGNORECASE ) - if ( regex_match ): - rexmsg = self.regex_err_msg( regex_match, regex) - log.info( "Job %s: %s" - % ( job.get_id_tag(), rexmsg ) ) - stderr = rexmsg + "\n" + stderr - max_error_level = max( max_error_level, - regex.error_level ) - if ( max_error_level >= - galaxy.tools.StdioErrorLevel.FATAL ): - break - - # If we encountered a fatal error, then we'll need to set the - # job state accordingly. Otherwise the job is ok: - if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL: - success = False - else: - success = True - - # When there are no regular expressions and no exit codes to check, - # default to the previous behavior: when there's anything on stderr - # the job has an error, and the job is ok otherwise. - else: - # TODO: Add in the tool and job id: - # log.debug( "Tool did not define exit code or stdio handling; " - # + "checking stderr for success" ) - if stderr: - success = False - else: - success = True - - # On any exception, return True. - except: - tb = traceback.format_exc() - log.warning( "Tool check encountered unexpected exception; " - + "assuming tool was successful: " + tb ) - success = True - - # Store the modified stdout and stderr in the job: - if None != job: - job.stdout = stdout - job.stderr = stderr - - return success - - def regex_err_msg( self, match, regex ): - """ - Return a message about the match on tool output using the given - ToolStdioRegex regex object. The regex_match is a MatchObject - that will contain the string matched on. - """ - # Get the description for the error level: - err_msg = galaxy.tools.StdioErrorLevel.desc( regex.error_level ) + ": " - # If there's a description for the regular expression, then use it. - # Otherwise, we'll take the first 256 characters of the match. - if None != regex.desc: - err_msg += regex.desc - else: - mstart = match.start() - mend = match.end() - err_msg += "Matched on " - # TODO: Move the constant 256 somewhere else besides here. - if mend - mstart > 256: - err_msg += match.string[ mstart : mstart+256 ] + "..." - else: - err_msg += match.string[ mstart: mend ] - return err_msg + return check_output( self.tool, stdout, stderr, tool_exit_code, job ) def cleanup( self ): # remove temporary files diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 lib/galaxy/jobs/error_level.py --- /dev/null +++ b/lib/galaxy/jobs/error_level.py @@ -0,0 +1,25 @@ + + +# These determine stdio-based error levels from matching on regular expressions +# and exit codes. They are meant to be used comparatively, such as showing +# that warning < fatal. This is really meant to just be an enum. +class StdioErrorLevel( object ): + NO_ERROR = 0 + LOG = 1 + WARNING = 2 + FATAL = 3 + MAX = 3 + descs = { + NO_ERROR: 'No error', + LOG: 'Log', + WARNING: 'Warning', + FATAL: 'Fatal error', + } + + @staticmethod + def desc( error_level ): + err_msg = "Unknown error" + if ( error_level > 0 and + error_level <= StdioErrorLevel.MAX ): + err_msg = StdioErrorLevel.descs[ error_level ] + return err_msg diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 lib/galaxy/jobs/output_checker.py --- /dev/null +++ b/lib/galaxy/jobs/output_checker.py @@ -0,0 +1,164 @@ +import re +from .error_level import StdioErrorLevel +import traceback + +from logging import getLogger +log = getLogger( __name__ ) + + +def check_output( tool, stdout, stderr, tool_exit_code, job ): + """ + Check the output of a tool - given the stdout, stderr, and the tool's + exit code, return True if the tool exited succesfully and False + otherwise. No exceptions should be thrown. If this code encounters + an exception, it returns True so that the workflow can continue; + otherwise, a bug in this code could halt workflow progress. + + Note that, if the tool did not define any exit code handling or + any stdio/stderr handling, then it reverts back to previous behavior: + if stderr contains anything, then False is returned. + + Note that the job id is just for messages. + """ + # By default, the tool succeeded. This covers the case where the code + # has a bug but the tool was ok, and it lets a workflow continue. + success = True + + try: + # Check exit codes and match regular expressions against stdout and + # stderr if this tool was configured to do so. + # If there is a regular expression for scanning stdout/stderr, + # then we assume that the tool writer overwrote the default + # behavior of just setting an error if there is *anything* on + # stderr. + if ( len( tool.stdio_regexes ) > 0 or + len( tool.stdio_exit_codes ) > 0 ): + # Check the exit code ranges in the order in which + # they were specified. Each exit_code is a StdioExitCode + # that includes an applicable range. If the exit code was in + # that range, then apply the error level and add a message. + # If we've reached a fatal error rule, then stop. + max_error_level = StdioErrorLevel.NO_ERROR + if tool_exit_code != None: + for stdio_exit_code in tool.stdio_exit_codes: + if ( tool_exit_code >= stdio_exit_code.range_start and + tool_exit_code <= stdio_exit_code.range_end ): + # Tack on a generic description of the code + # plus a specific code description. For example, + # this might prepend "Job 42: Warning (Out of Memory)\n". + code_desc = stdio_exit_code.desc + if ( None == code_desc ): + code_desc = "" + tool_msg = ( "%s: Exit code %d (%s)" % ( + StdioErrorLevel.desc( stdio_exit_code.error_level ), + tool_exit_code, + code_desc ) ) + log.info( "Job %s: %s" % (job.get_id_tag(), tool_msg) ) + stderr = tool_msg + "\n" + stderr + max_error_level = max( max_error_level, + stdio_exit_code.error_level ) + if ( max_error_level >= + StdioErrorLevel.FATAL ): + break + + if max_error_level < StdioErrorLevel.FATAL: + # We'll examine every regex. Each regex specifies whether + # it is to be run on stdout, stderr, or both. (It is + # possible for neither stdout nor stderr to be scanned, + # but those regexes won't be used.) We record the highest + # error level, which are currently "warning" and "fatal". + # If fatal, then we set the job's state to ERROR. + # If warning, then we still set the job's state to OK + # but include a message. We'll do this if we haven't seen + # a fatal error yet + for regex in tool.stdio_regexes: + # If ( this regex should be matched against stdout ) + # - Run the regex's match pattern against stdout + # - If it matched, then determine the error level. + # o If it was fatal, then we're done - break. + # Repeat the stdout stuff for stderr. + # TODO: Collapse this into a single function. + if ( regex.stdout_match ): + regex_match = re.search( regex.match, stdout, + re.IGNORECASE ) + if ( regex_match ): + rexmsg = __regex_err_msg( regex_match, regex) + log.info( "Job %s: %s" + % ( job.get_id_tag(), rexmsg ) ) + stdout = rexmsg + "\n" + stdout + max_error_level = max( max_error_level, + regex.error_level ) + if ( max_error_level >= + StdioErrorLevel.FATAL ): + break + + if ( regex.stderr_match ): + regex_match = re.search( regex.match, stderr, + re.IGNORECASE ) + if ( regex_match ): + rexmsg = __regex_err_msg( regex_match, regex) + log.info( "Job %s: %s" + % ( job.get_id_tag(), rexmsg ) ) + stderr = rexmsg + "\n" + stderr + max_error_level = max( max_error_level, + regex.error_level ) + if ( max_error_level >= + StdioErrorLevel.FATAL ): + break + + # If we encountered a fatal error, then we'll need to set the + # job state accordingly. Otherwise the job is ok: + if max_error_level >= StdioErrorLevel.FATAL: + success = False + else: + success = True + + # When there are no regular expressions and no exit codes to check, + # default to the previous behavior: when there's anything on stderr + # the job has an error, and the job is ok otherwise. + else: + # TODO: Add in the tool and job id: + # log.debug( "Tool did not define exit code or stdio handling; " + # + "checking stderr for success" ) + if stderr: + success = False + else: + success = True + + # On any exception, return True. + except: + tb = traceback.format_exc() + log.warning( "Tool check encountered unexpected exception; " + + "assuming tool was successful: " + tb ) + success = True + + # Store the modified stdout and stderr in the job: + if None != job: + job.stdout = stdout + job.stderr = stderr + + return success + + +def __regex_err_msg( match, regex ): + """ + Return a message about the match on tool output using the given + ToolStdioRegex regex object. The regex_match is a MatchObject + that will contain the string matched on. + """ + # Get the description for the error level: + err_msg = StdioErrorLevel.desc( regex.error_level ) + ": " + # If there's a description for the regular expression, then use it. + # Otherwise, we'll take the first 256 characters of the match. + if None != regex.desc: + err_msg += regex.desc + else: + mstart = match.start() + mend = match.end() + err_msg += "Matched on " + # TODO: Move the constant 256 somewhere else besides here. + if mend - mstart > 256: + err_msg += match.string[ mstart : mstart + 256 ] + "..." + else: + err_msg += match.string[ mstart: mend ] + return err_msg diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -34,6 +34,7 @@ from sqlalchemy import and_ from galaxy import jobs, model +from galaxy.jobs.error_level import StdioErrorLevel from galaxy.datatypes.metadata import JobExternalOutputMetadataWrapper from galaxy.jobs import ParallelismInfo from galaxy.tools.actions import DefaultToolAction @@ -64,33 +65,11 @@ from tool_shed.util import shed_util_common from .loader import load_tool, template_macro_params + log = logging.getLogger( __name__ ) WORKFLOW_PARAMETER_REGULAR_EXPRESSION = re.compile( '''\$\{.+?\}''' ) -# These determine stdio-based error levels from matching on regular expressions -# and exit codes. They are meant to be used comparatively, such as showing -# that warning < fatal. This is really meant to just be an enum. -class StdioErrorLevel( object ): - NO_ERROR = 0 - LOG = 1 - WARNING = 2 - FATAL = 3 - MAX = 3 - descs = { - NO_ERROR : 'No error', - LOG: 'Log', - WARNING : 'Warning', - FATAL : 'Fatal error' - } - @staticmethod - def desc( error_level ): - err_msg = "Unknown error" - if ( error_level > 0 and - error_level <= StdioErrorLevel.MAX ): - err_msg = StdioErrorLevel.descs[ error_level ] - return err_msg - class ToolNotFoundException( Exception ): pass diff -r ba45d14a2c8fe72554390872deb5f4ffdd66170f -r c78f3f687fbc8592d32a67738e57d847c00ce0c6 test/unit/test_job_output_checker.py --- /dev/null +++ b/test/unit/test_job_output_checker.py @@ -0,0 +1,62 @@ +from unittest import TestCase +from galaxy.util.bunch import Bunch +from galaxy.jobs.output_checker import check_output +from galaxy.jobs.error_level import StdioErrorLevel + + +class OutputCheckerTestCase( TestCase ): + + def setUp( self ): + self.tool = Bunch( + stdio_regexes=[], + stdio_exit_codes=[], + ) + self.job = Bunch( + stdout=None, + stderr=None, + get_id_tag=lambda: "test_id", + ) + self.stdout = '' + self.stderr = '' + self.tool_exit_code = None + + def test_default_no_stderr_success( self ): + self.__assertSuccessful() + + def test_default_stderr_failure( self ): + self.stderr = 'foo' + self.__assertNotSuccessful() + + def test_exit_code_error( self ): + mock_exit_code = Bunch( range_start=1, range_end=1, error_level=StdioErrorLevel.FATAL, desc=None ) + self.tool.stdio_exit_codes.append( mock_exit_code ) + self.tool_exit_code = 1 + self.__assertNotSuccessful() + + def test_exit_code_success( self ): + mock_exit_code = Bunch( range_start=1, range_end=1, error_level=StdioErrorLevel.FATAL, desc=None ) + self.tool.stdio_exit_codes.append( mock_exit_code ) + self.tool_exit_code = 0 + self.__assertSuccessful() + + def test_problematic_strings( self ): + problematic_str = '\x80abc' + regex_rule = Bunch( match=r'.abc', stdout_match=False, stderr_match=True, error_level=StdioErrorLevel.FATAL, desc=None ) + self.tool.stdio_regexes = [ regex_rule ] + self.stderr = problematic_str + self.__assertNotSuccessful() + + problematic_str = '\x80abc' + regex_rule = Bunch( match=r'.abcd', stdout_match=False, stderr_match=True, error_level=StdioErrorLevel.FATAL, desc=None ) + self.tool.stdio_regexes = [ regex_rule ] + self.stderr = problematic_str + self.__assertSuccessful() + + def __assertSuccessful( self ): + self.assertTrue( self.__check_output() ) + + def __assertNotSuccessful( self ): + self.assertFalse( self.__check_output() ) + + def __check_output( self ): + return check_output( self.tool, self.stdout, self.stderr, self.tool_exit_code, self.job ) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org