commit/galaxy-central: Scott McManus: Added application of regular expressions and exit code. Pulling exit
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/3b2dc0a51d14/ changeset: 3b2dc0a51d14 user: Scott McManus date: 2012-06-19 17:35:36 summary: Added application of regular expressions and exit code. Pulling exit codes from the runners will be part of a separate submission. affected #: 3 files diff -r 7c495f835a1d436ad33dff6107784f106cc24980 -r 3b2dc0a51d147c3fc8f0560d8fd3ebefc0792e99 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -305,10 +305,81 @@ if job.state == job.states.DELETED or job.state == job.states.ERROR: #ERROR at this point means the job was deleted by an administrator. return self.fail( job.info ) - if stderr: - job.state = job.states.ERROR + + err_msg = "" + # Check exit codes and match regular expressions against stdout and + # stderr if this tool was configured to do so. + if ( len( self.tool.stdio_regexes ) > 0 or + len( self.tool.exit_codes ) > 0 ): + # We will check the exit code ranges in the order in which + # they were specified. Each exit_code is a ToolStdioExitCode + # that includes an applicable range. If the exit code was in + # that range, then apply the error level and add in a message. + # If we've reached a fatal error rule, then stop. + max_error_level = galaxy.tools.StdioErrorLevel.NO_ERROR + for exit_code in self.tool.stdio_exit_codes: + # TODO: Fetch the exit code from the .rc file: + tool_exit_code = 0 + if ( tool_exit_code >= exit_code.range_start and + tool_exit_code <= exit_code.range_end ): + if None != exit_code.desc: + err_msg += exit_code.desc + # TODO: Find somewhere to stick the err_msg - possibly to + # the source (stderr/stdout), possibly in a new db column. + max_error_level = max( max_error_level, + exit_code.error_level ) + if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL: + break + # If there is a regular expression for scanning stdout/stderr, + # then we assume that the tool writer overwrote the default + # behavior of just setting an error if there is *anything* on + # stderr. + if max_error_level < galaxy.tools.StdioErrorLevel.FATAL: + # We'll examine every regex. Each regex specifies whether + # it is to be run on stdout, stderr, or both. (It is + # possible for neither stdout nor stderr to be scanned, + # but those won't be scanned.) We record the highest + # error level, which are currently "warning" and "fatal". + # If fatal, then we set the job's state to ERROR. + # If warning, then we still set the job's state to OK + # but include a message. We'll do this if we haven't seen + # a fatal error yet + for regex in self.tool.stdio_regexes: + # If ( this regex should be matched against stdout ) + # - Run the regex's match pattern against stdout + # - If it matched, then determine the error level. + # o If it was fatal, then we're done - break. + # Repeat the stdout stuff for stderr. + # TODO: Collapse this into a single function. + if ( regex.stdout_match ): + regex_match = re.search( regex.match, stdout ) + if ( regex_match ): + err_msg += self.regex_err_msg( regex_match, regex ) + max_error_level = max( max_error_level, regex.error_level ) + if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL: + break + if ( regex.stderr_match ): + regex_match = re.search( regex.match, stderr ) + if ( regex_match ): + err_msg += self.regex_err_msg( regex_match, regex ) + max_error_level = max( max_error_level, + regex.error_level ) + if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL: + break + # If we encountered a fatal error, then we'll need to set the + # job state accordingly. Otherwise the job is ok: + if max_error_level >= galaxy.tools.StdioErrorLevel.FATAL: + job.state = job.states.ERROR + else: + job.state = job.states.OK + # When there are no regular expressions and no exit codes to check, + # default to the previous behavior: when there's anything on stderr + # the job has an error, and the job is ok otherwise. else: - job.state = job.states.OK + if stderr: + job.state = job.states.ERROR + else: + job.state = job.states.OK if self.version_string_cmd: version_filename = self.get_version_string_path() if os.path.exists(version_filename): @@ -330,6 +401,7 @@ return self.fail( "Job %s's output dataset(s) could not be read" % job.id ) job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) ) job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None ) + def in_directory( file, directory ): # Make both absolute. directory = os.path.abspath( directory ) @@ -370,7 +442,11 @@ # Update (non-library) job output datasets through the object store if dataset not in job.output_library_datasets: self.app.object_store.update_from_file(dataset.dataset, create=True) - if context['stderr']: + # TODO: The context['stderr'] holds stderr's contents. An error + # only really occurs if the job also has an error. So check the + # job's state: + #if context['stderr']: + if job.states.ERROR == job.state: dataset.blurb = "error" elif dataset.has_data(): # If the tool was expected to set the extension, attempt to retrieve it @@ -385,7 +461,14 @@ ( not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) \ and self.app.config.retry_metadata_internally ): dataset.set_meta( overwrite = False ) - elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and not context['stderr']: + # TODO: The context['stderr'] used to indicate that there + # was an error. Now we must rely on the job's state instead; + # that indicates whether the tool relied on stderr to indicate + # the state or whether the tool used exit codes and regular + # expressions to do so. So we use + # job.state == job.states.ERROR to replace this same test. + #elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and not context['stderr']: + elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and job.states.ERROR != job.state: dataset._state = model.Dataset.states.FAILED_METADATA else: #load metadata from file @@ -415,7 +498,12 @@ if dataset.ext == 'auto': dataset.extension = 'txt' self.sa_session.add( dataset ) - if context['stderr']: + # TODO: job.states.ERROR == job.state now replaces checking + # stderr for a problem: + #if context['stderr']: + if job.states.ERROR == job.state: + log.debug( "setting dataset state to ERROR" ) + # TODO: This is where the state is being set to error. Change it! dataset_assoc.dataset.dataset.state = model.Dataset.states.ERROR else: dataset_assoc.dataset.dataset.state = model.Dataset.states.OK @@ -480,6 +568,29 @@ if self.app.config.cleanup_job == 'always' or ( not stderr and self.app.config.cleanup_job == 'onsuccess' ): self.cleanup() + def regex_err_msg( self, match, regex ): + """ + Return a message about the match on tool output using the given + ToolStdioRegex regex object. The regex_match is a MatchObject + that will contain the string matched on. + """ + # Get the description for the error level: + err_msg = galaxy.tools.StdioErrorLevel.desc( regex.error_level ) + ": " + # If there's a description for the regular expression, then use it. + # Otherwise, we'll take the first 256 characters of the match. + if None != regex.desc: + err_msg += regex.desc + else: + mstart = match.start() + mend = match.end() + err_msg += "Matched on " + # TODO: Move the constant 256 somewhere else besides here. + if mend - mstart > 256: + err_msg += match.string[ mstart : mstart+256 ] + "..." + else: + err_msg += match.string[ mstart: mend ] + return err_msg + def cleanup( self ): # remove temporary files try: diff -r 7c495f835a1d436ad33dff6107784f106cc24980 -r 3b2dc0a51d147c3fc8f0560d8fd3ebefc0792e99 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -37,6 +37,23 @@ log = logging.getLogger( __name__ ) +# These determine stdio-based error levels from matching on regular expressions +# and exit codes. They are meant to be used comparatively, such as showing +# that warning < fatal. This is really meant to just be an enum. +class StdioErrorLevel( object ): + NO_ERROR = 0 + WARNING = 1 + FATAL = 2 + MAX = 2 + descs = {NO_ERROR : 'No error', WARNING : 'Warning', FATAL : 'Fatal error'} + @staticmethod + def desc( error_level ): + err_msg = "Unknown error" + if ( error_level > 0 and + error_level <= StdioErrorLevel.MAX ): + err_msg = StdioErrorLevel.descs[ error_level ] + return err_msg + class ToolNotFoundException( Exception ): pass @@ -1140,6 +1157,12 @@ # a warning and skip to the next. for exit_code_elem in ( stdio_elem.findall( "exit_code" ) ): exit_code = ToolStdioExitCode() + # Each exit code has an optional description that can be + # part of the "desc" or "description" attributes: + exit_code.desc = exit_code_elem.get( "desc" ) + if None == exit_code.desc: + exit_code.desc = exit_code_elem.get( "description" ) + # Parse the error level: exit_code.error_level = ( self.parse_error_level( exit_code_elem.get( "level" ))) code_range = exit_code_elem.get( "range", "" ) @@ -1155,11 +1178,9 @@ # X:Y - Split on the colon. We do not allow a colon # without a beginning or end, though we could. # Also note that whitespace is eliminated. - # TODO: Turn this into a single match - it will be - # more efficient - string.strip( code_range ) + # TODO: Turn this into a single match - it should be + # more efficient. code_range = re.sub( "\s", "", code_range ) - log.debug( "Code range after sub: %s" % code_range ) code_ranges = re.split( ":", code_range ) if ( len( code_ranges ) == 2 ): if ( None == code_ranges[0] or '' == code_ranges[0] ): @@ -1216,6 +1237,12 @@ for regex_elem in ( stdio_elem.findall( "regex" ) ): # TODO: Fill in ToolStdioRegex regex = ToolStdioRegex() + # Each regex has an optional description that can be + # part of the "desc" or "description" attributes: + regex.desc = regex_elem.get( "desc" ) + if None == regex.desc: + regex.desc = regex_elem.get( "description" ) + # Parse the error level regex.error_level = ( self.parse_error_level( regex_elem.get( "level" ) ) ) regex.match = regex_elem.get( "match", "" ) @@ -1243,9 +1270,9 @@ # and anything to do with "err". If neither stdout nor # stderr were specified, then raise a warning and scan both. for src in src_list: - if re.match( "out", src, re.IGNORECASE ): + if re.search( "out", src, re.IGNORECASE ): regex.stdout_match = True - if re.match( "err", src, re.IGNORECASE ): + if re.search( "err", src, re.IGNORECASE ): regex.stderr_match = True if (not regex.stdout_match and not regex.stderr_match): log.warning( "Unable to determine if tool stream " @@ -1262,24 +1289,25 @@ trace_msg = repr( traceback.format_tb( trace ) ) log.error( "Traceback: %s" % trace_msg ) + # TODO: This method doesn't have to be part of the Tool class. def parse_error_level( self, err_level ): """ Return fatal or warning depending on what's in the error level. This will assume that the error level fatal is returned if it's - unparsable. (This doesn't have to be part of the Tool class.) + unparsable. """ # What should the default be? I'm claiming it should be fatal: # if you went to the trouble to write the rule, then it's # probably a problem. I think there are easily three substantial # camps: make it fatal, make it a warning, or, if it's missing, - # just throw an exception and ignore it. - return_level = "fatal" + # just throw an exception and ignore the exit_code element. + return_level = StdioErrorLevel.FATAL try: if ( None != err_level ): if ( re.search( "warning", err_level, re.IGNORECASE ) ): - return_level = "warning" + return_level = StdioErrorLevel.WARNING elif ( re.search( "fatal", err_level, re.IGNORECASE ) ): - return_level = "fatal" + return_level = StdioErrorLevel.FATAL except Exception, e: log.error( "Exception in parse_error_level " + str(sys.exc_info() ) ) @@ -2333,16 +2361,18 @@ installed_tool_dependencies = self.tool_shed_repository.tool_dependencies else: installed_tool_dependencies = None - for requirement in self.requirements: - # TODO: currently only supporting requirements of type package, - # need to implement some mechanism for mapping other types - # back to packages + for requirement in self.requirements: + # TODO: currently only supporting requirements of type package, + # need to implement some mechanism for mapping other types + # back to packages log.debug( "Building dependency shell command for dependency '%s'", requirement.name ) - if requirement.type == 'package': + if requirement.type == 'package': script_file, base_path, version = self.app.toolbox.dependency_manager.find_dep( name=requirement.name, version=requirement.version, type=requirement.type, installed_tool_dependencies=installed_tool_dependencies ) + if requirement.type == 'package': + script_file, base_path, version = self.app.toolbox.dependency_manager.find_dep( requirement.name, requirement.version ) if script_file is None and base_path is None: log.warn( "Failed to resolve dependency on '%s', ignoring", requirement.name ) elif script_file is None: @@ -2617,7 +2647,7 @@ elif isinstance( input, SelectToolParameter ): param_dict.update( { 'type' : 'select', 'html' : urllib.quote( input.get_html( trans ) ), - 'options': input.static_options + 'options': input.static_options } ) elif isinstance( input, Conditional ): # TODO. @@ -2626,7 +2656,7 @@ param_dict.update( { 'type' : 'number', 'init_value' : input.value, 'html' : urllib.quote( input.get_html( trans ) ), 'min': input.min, - 'max': input.max, + 'max': input.max 'value': input.value } ) else: @@ -2798,6 +2828,7 @@ self.stderr_match = False # TODO: Define a common class or constant for error level: self.error_level = "fatal" + self.desc = "" class ToolStdioExitCode( object ): """ @@ -2809,6 +2840,7 @@ self.range_end = float( "inf" ) # TODO: Define a common class or constant for error level: self.error_level = "fatal" + self.desc = "" class ToolParameterValueWrapper( object ): """ Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket