commit/galaxy-central: jmchilton: Fixes for DRMAA submission failures.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/cc0fe62e54dc/ Changeset: cc0fe62e54dc Branch: stable User: jmchilton Date: 2014-05-12 20:32:34 Summary: Fixes for DRMAA submission failures. Mark job as failed once Galaxy is done attempting to submit the job, catch and retry drmaa.DeniedByDrmException like drmaa.InternalException. Affected #: 1 file diff -r 3534f67acd2a4f84899335326ac2cc8bcf05201d -r cc0fe62e54dc59607c26cd9152ef1ea7c85af625 lib/galaxy/jobs/runners/drmaa.py --- a/lib/galaxy/jobs/runners/drmaa.py +++ b/lib/galaxy/jobs/runners/drmaa.py @@ -177,10 +177,16 @@ while external_job_id is None and trynum < 5: try: external_job_id = self.ds.runJob(jt) - except drmaa.InternalException, e: + break + except ( drmaa.InternalException, drmaa.DeniedByDrmException ), e: trynum += 1 log.warning( '(%s) drmaa.Session.runJob() failed, will retry: %s', galaxy_id_tag, e ) time.sleep( 5 ) + else: + log.error( "(%s) All attempts to submit job failed" % galaxy_id_tag ) + job_wrapper.fail( "Unable to run this job due to a cluster error, please retry it later" ) + self.ds.deleteJobTemplate( jt ) + return else: job_wrapper.change_ownership_for_run() log.debug( '(%s) submitting with credentials: %s [uid: %s]' % ( galaxy_id_tag, job_wrapper.user_system_pwent[0], job_wrapper.user_system_pwent[2] ) ) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org