commit/galaxy-central: natefoo: Retry pbs_submit if it fails.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/083d480bf5e3/ changeset: 083d480bf5e3 user: natefoo date: 2012-10-19 18:56:10 summary: Retry pbs_submit if it fails. affected #: 1 file diff -r 8bcc04c4b710dbf4173a5fba6d2a3111e7c2b511 -r 083d480bf5e35009f01e9439f11aa7e68716e429 lib/galaxy/jobs/runners/pbs.py --- a/lib/galaxy/jobs/runners/pbs.py +++ b/lib/galaxy/jobs/runners/pbs.py @@ -310,13 +310,19 @@ galaxy_job_id = job_wrapper.get_id_tag() log.debug("(%s) submitting file %s" % ( galaxy_job_id, job_file ) ) log.debug("(%s) command is: %s" % ( galaxy_job_id, command_line ) ) - job_id = pbs.pbs_submit(c, job_attrs, job_file, pbs_queue_name, None) - pbs.pbs_disconnect(c) - # check to see if it submitted - if not job_id: + tries = 0 + while tries < 5: + job_id = pbs.pbs_submit(c, job_attrs, job_file, pbs_queue_name, None) + tries += 1 + if job_id: + pbs.pbs_disconnect(c) + break errno, text = pbs.error() - log.debug( "(%s) pbs_submit failed, PBS error %d: %s" % (galaxy_job_id, errno, text) ) + log.warning( "(%s) pbs_submit failed (try %d/5), PBS error %d: %s" % (galaxy_job_id, tries, errno, text) ) + time.sleep(2) + else: + log.error( "(%s) All attempts to submit job failed" % galaxy_job_id ) job_wrapper.fail( "Unable to run this job due to a cluster error, please retry it later" ) return Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket