[hg] galaxy 1577: Improve job error messaging, some fixes for se...
details: http://www.bx.psu.edu/hg/galaxy/rev/0f4fd4c20cd6 changeset: 1577:0f4fd4c20cd6 user: Greg Von Kuster <greg@bx.psu.edu> date: Tue Oct 28 10:21:02 2008 -0400 description: Improve job error messaging, some fixes for setting job state, job info, dataset state, and dataset info when job ends in error. 2 file(s) affected in this change: lib/galaxy/jobs/__init__.py templates/dataset/errors.tmpl diffs (293 lines): diff -r 8eec48aaca6e -r 0f4fd4c20cd6 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Mon Oct 27 17:03:50 2008 -0400 +++ b/lib/galaxy/jobs/__init__.py Tue Oct 28 10:21:02 2008 -0400 @@ -176,11 +176,17 @@ self.dispatcher.put( job ) log.debug( "job %d dispatched" % job.job_id) elif job_state == JOB_DELETED: - log.debug( "job %d deleted by user while still queued" % job.job_id ) + msg = "job %d deleted by user while still queued" % job.job_id + job.info = msg + log.debug( msg ) else: - log.error( "unknown job state '%s' for job %d" % ( job_state, job.job_id )) - except: - log.exception( "failure running job %d" % job.job_id ) + msg = "unknown job state '%s' for job %d" % ( job_state, job.job_id ) + job.info = msg + log.error( msg ) + except Exception, e: + msg = "failure running job %d: %s" % ( job.job_id, str( e ) ) + job.info = msg + log.exception( msg ) # Update the waiting list self.waiting = new_waiting # If special (e.g. fair) scheduling is enabled, dispatch all jobs @@ -194,9 +200,10 @@ except Empty: # squeue is empty, so stop dispatching break - except: # if something else breaks while dispatching - job.fail( "failure dispatching job" ) - log.exception( "failure running job %d" % sjob.job_id ) + except Exception, e: # if something else breaks while dispatching + msg = "failure running job %d: %s" % ( sjob.job_id, str( e ) ) + job.fail( msg ) + log.exception( msg ) def put( self, job_id, tool ): """Add a job to the queue (by job identifier)""" @@ -301,7 +308,7 @@ self.extra_filenames = extra_filenames return extra_filenames - def fail( self, message, exception=False ): + def fail( self, message, state=None, exception=False ): """ Indicate job failure by setting state and message on all output datasets. @@ -309,25 +316,26 @@ job = model.Job.get( self.job_id ) job.refresh() # if the job was deleted, don't fail it - if job.state == job.states.DELETED: - self.cleanup() - return - for dataset_assoc in job.output_datasets: - dataset = dataset_assoc.dataset - dataset.refresh() - dataset.state = dataset.states.ERROR - dataset.blurb = 'tool error' - dataset.info = message - dataset.set_size() - dataset.flush() - job.state = model.Job.states.ERROR - job.command_line = self.command_line - job.info = message - # If the failure is due to a Galaxy framework exception, save - # the traceback - if exception: - job.traceback = traceback.format_exc() - job.flush() + if not job.state == job.states.DELETED: + for dataset_assoc in job.output_datasets: + dataset = dataset_assoc.dataset + dataset.refresh() + dataset.state = dataset.states.ERROR + dataset.blurb = 'tool error' + dataset.info = message + dataset.set_size() + dataset.flush() + if state is not None: + job.state = state + else: + job.state = model.Job.states.ERROR + job.command_line = self.command_line + job.info = message + # If the failure is due to a Galaxy framework exception, save the traceback + if exception: + job.traceback = traceback.format_exc() + job.flush() + # If the job was deleted, just clean up self.cleanup() def change_state( self, state, info = False ): @@ -371,16 +379,19 @@ job.refresh() for dataset_assoc in job.input_datasets: idata = dataset_assoc.dataset - if not idata: continue + if not idata: + continue idata.refresh() idata.dataset.refresh() #we need to refresh the base Dataset, since that is where 'state' is stored # don't run jobs for which the input dataset was deleted - if idata.deleted == True: - self.fail( "input data %d was deleted before this job ran" % idata.hid ) + if idata.deleted: + msg = "input data %d was deleted before this job started" % idata.hid + self.fail( msg, state=JOB_INPUT_DELETED ) return JOB_INPUT_DELETED # an error in the input data causes us to bail immediately elif idata.state == idata.states.ERROR: - self.fail( "error in input data %d" % idata.hid ) + msg = "input data %d is in an error state" % idata.hid + self.fail( msg, state=JOB_INPUT_ERROR ) return JOB_INPUT_ERROR elif idata.state != idata.states.OK: # need to requeue @@ -467,8 +478,8 @@ os.remove( fname ) if self.working_directory is not None: os.rmdir( self.working_directory ) - except: - log.exception( "Unable to cleanup job %s" % self.job_id ) + except Exception, e: + log.exception( "Unable to cleanup job %s, exception: %s" % ( str( self.job_id ), str( e ) ) ) def get_command_line( self ): return self.command_line @@ -617,7 +628,7 @@ job = model.Job.get( job_id ) job.refresh() job.state = job.states.DELETED - job.info = "Job deleted by user before it completed." + job.info = "Job output deleted by user before job completed." job.flush() for dataset_assoc in job.output_datasets: dataset = dataset_assoc.dataset @@ -630,7 +641,7 @@ dataset.deleted = True dataset.blurb = 'deleted' dataset.peek = 'Job deleted' - dataset.info = 'Job deleted by user before it completed' + dataset.info = 'Job output deleted by user before job completed' dataset.flush() def put( self, job ): diff -r 8eec48aaca6e -r 0f4fd4c20cd6 templates/dataset/errors.tmpl --- a/templates/dataset/errors.tmpl Mon Oct 27 17:03:50 2008 -0400 +++ b/templates/dataset/errors.tmpl Tue Oct 28 10:21:02 2008 -0400 @@ -1,79 +1,69 @@ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> + <head> + <title>Dataset generation errors</title> + <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> + <link href="/static/style/base.css" rel="stylesheet" type="text/css" /> + <style> + pre + { + background: white; + color: black; + border: dotted black 1px; + overflow: auto; + padding: 10px; + } + </style> + </head> -<head> -<title>Dataset generation errors</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> -<link href="/static/style/base.css" rel="stylesheet" type="text/css" /> -<style> -pre -{ - background: white; - color: black; - border: dotted black 1px; - overflow: auto; - padding: 10px; -} -</style> -</head> + <body> + <h2>Dataset generation errors</h2> + <p><b>Dataset $dataset.hid: $dataset.display_name</b></p> -<body> - - <h2>Dataset generation errors</h2> - - <p><b>Dataset $dataset.hid: $dataset.display_name</b></p> - - #if $dataset.creating_job_associations - - #set job = $dataset.creating_job_associations[0].job - - #if job.traceback - The Galaxy framework encountered the following error while attempting - to run the tool: + #if $dataset.creating_job_associations + #set job = $dataset.creating_job_associations[0].job + #if job.traceback + The Galaxy framework encountered the following error while attempting to run the tool: + <pre>${job.traceback}</pre> + #end if + #if $job.stderr or $job.info + Tool execution generated the following error message: + #if $job.stderr + <pre>${job.stderr}</pre> + #elif $job.info + <pre>${job.info}</pre> + #end if + #else + Tool execution did not generate any error messages. + #end if + #if $job.stdout + The tool produced the following additional output: + <pre>${job.stdout}</pre> + #end if + #else + The tool did not create any additional job / error info. + #end if - <pre>${job.traceback}</pre> - - #end if - - #if $job.stderr - Tool execution generated the following error message: - <pre>${job.stderr}</pre> - #else - Tool execution did not generate any error messages. - #end if - - #if $job.stdout - The tool produced the following additional output: - <pre>${job.stdout}</pre> - #end if - - #else - - The tool did not create any additional job / error info. - - #end if - - <h2>Report this error to the Galaxy Team</h2> - - <p>The Galaxy team regularly reviews errors that occur in the application. - However, if you would like to provide additional information (such as - what you were trying to do when the error occurred) and a contact e-mail - address, we will be better able to investigate your problem and get back - to you.</p> - - <div class="toolForm"> - <div class="toolFormTitle">Error Report</div> - <div class="toolFormBody"> - <form name="report_error" action="${h.url_for( action='report_error')}" method="post" > - <input type="hidden" name="id" value="$dataset.id" /> - <table> - <tr valign="top"><td>Your Email:</td><td><input type="text" name="email" size="40" /></td></tr> - <tr valign="top"><td>Message:</td><td><textarea name="message", rows="10" cols="40" /></textarea></td></tr> - <tr><td></td><td><input type="submit" value="Report"> - </table> - </form> - </div> - </div> - -</body> + <h2>Report this error to the Galaxy Team</h2> + <p> + The Galaxy team regularly reviews errors that occur in the application. + However, if you would like to provide additional information (such as + what you were trying to do when the error occurred) and a contact e-mail + address, we will be better able to investigate your problem and get back + to you. + </p> + <div class="toolForm"> + <div class="toolFormTitle">Error Report</div> + <div class="toolFormBody"> + <form name="report_error" action="${h.url_for( action='report_error')}" method="post" > + <input type="hidden" name="id" value="$dataset.id" /> + <table> + <tr valign="top"><td>Your Email:</td><td><input type="text" name="email" size="40" /></td></tr> + <tr valign="top"><td>Message:</td><td><textarea name="message", rows="10" cols="40" /></textarea></td></tr> + <tr><td></td><td><input type="submit" value="Report"> + </table> + </form> + </div> + </div> + </body> </html>
participants (1)
-
Greg Von Kuster