1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/changeset/2e0254547b09/
changeset: 2e0254547b09
user: smcmanus
date: 2012-06-25 23:16:49
summary: Recommit of PBS and DRMAA fixes
affected #: 3 files
diff -r 319b2b0e832c8ea5ef520c76722f7ad6270507c3 -r
2e0254547b09bfccbd0834524a0fb4919b6a6efd lib/galaxy/jobs/__init__.py
--- a/lib/galaxy/jobs/__init__.py
+++ b/lib/galaxy/jobs/__init__.py
@@ -291,8 +291,6 @@
the output datasets based on stderr and stdout from the command, and
the contents of the output files.
"""
- # TODO: Eliminate debugging code after testing all runners
- log.debug( "JobWrapper.finish: exit code:" + str(tool_exit_code) )
# default post job setup
self.sa_session.expunge_all()
job = self.get_job()
diff -r 319b2b0e832c8ea5ef520c76722f7ad6270507c3 -r
2e0254547b09bfccbd0834524a0fb4919b6a6efd lib/galaxy/jobs/runners/drmaa.py
--- a/lib/galaxy/jobs/runners/drmaa.py
+++ b/lib/galaxy/jobs/runners/drmaa.py
@@ -39,6 +39,11 @@
drmaa.JobState.FAILED: 'job finished, but failed',
}
+# The last four lines (following the last fi) will:
+# - setup the env
+# - move to the job wrapper's working directory
+# - execute the command
+# - take the command's exit code ($?) and write it to a file.
drm_template = """#!/bin/sh
GALAXY_LIB="%s"
if [ "$GALAXY_LIB" != "None" ]; then
@@ -52,6 +57,7 @@
%s
cd %s
%s
+echo $? > %s
"""
def __lineno__():
"""Returns the current line number in our program."""
@@ -77,7 +83,7 @@
self.job_file = None
self.ofile = None
self.efile = None
- self.rcfile = None
+ self.ecfile = None
self.runner_url = None
class DRMAAJobRunner( BaseJobRunner ):
@@ -169,7 +175,7 @@
# define job attributes
ofile = "%s.drmout" % os.path.join(job_wrapper.working_directory,
job_wrapper.get_id_tag())
efile = "%s.drmerr" % os.path.join(job_wrapper.working_directory,
job_wrapper.get_id_tag())
- rcfile = "%s.drmrc" % os.path.join(job_wrapper.working_directory,
job_wrapper.get_id_tag())
+ ecfile = "%s.drmec" % os.path.join(job_wrapper.working_directory,
job_wrapper.get_id_tag())
job_name = "g%s_%s_%s" % ( job_wrapper.job_id, job_wrapper.tool.id,
job_wrapper.user )
job_name = ''.join( map( lambda x: x if x in ( string.letters +
string.digits + '_' ) else '_', job_name ) )
@@ -178,7 +184,7 @@
jt.jobName = job_name
jt.outputPath = ":%s" % ofile
jt.errorPath = ":%s" % efile
- jt.returnCodePath = ":%s" % rcfile
+ # Avoid a jt.exitCodePath for now - it's only used when finishing.
native_spec = self.get_native_spec( runner_url )
if native_spec is not None:
jt.nativeSpecification = native_spec
@@ -187,7 +193,8 @@
script = drm_template % ( job_wrapper.galaxy_lib_dir,
job_wrapper.get_env_setup_clause(),
os.path.abspath( job_wrapper.working_directory ),
- command_line )
+ command_line,
+ ecfile )
try:
fh = file( jt.remoteCommand, "w" )
@@ -231,7 +238,7 @@
drm_job_state.job_id = job_id
drm_job_state.ofile = ofile
drm_job_state.efile = efile
- drm_job_state.rcfile = rcfile
+ drm_job_state.ecfile = ecfile
drm_job_state.job_file = jt.remoteCommand
drm_job_state.old_state = 'new'
drm_job_state.running = False
@@ -316,17 +323,22 @@
"""
ofile = drm_job_state.ofile
efile = drm_job_state.efile
- rcfile = drm_job_state.rcfile
+ ecfile = drm_job_state.ecfile
job_file = drm_job_state.job_file
# collect the output
# wait for the files to appear
which_try = 0
+ # By default, the exit code is 0, which typically indicates success.
+ exit_code = 0
while which_try < (self.app.config.retry_job_output_collection + 1):
try:
ofh = file(ofile, "r")
efh = file(efile, "r")
+ ecfh = file(ecfile, "r")
stdout = ofh.read( 32768 )
stderr = efh.read( 32768 )
+ # The exit code should only be 8 bits, but read more anyway
+ exit_code_str = ecfh.read(32)
which_try = (self.app.config.retry_job_output_collection + 1)
except:
if which_try == self.app.config.retry_job_output_collection:
@@ -337,8 +349,15 @@
time.sleep(1)
which_try += 1
+ # Decode the exit code. If it's bogus, then just use 0.
try:
- drm_job_state.job_wrapper.finish( stdout, stderr )
+ exit_code = int(exit_code_str)
+ except:
+ log.warning( "Exit code " + exit_code_str + " invalid. Using
0." )
+ exit_code = 0
+
+ try:
+ drm_job_state.job_wrapper.finish( stdout, stderr, exit_code )
except:
log.exception("Job wrapper finish method failed")
@@ -382,7 +401,7 @@
drm_job_state = DRMAAJobState()
drm_job_state.ofile = "%s.drmout" % os.path.join(os.getcwd(),
job_wrapper.working_directory, job_wrapper.get_id_tag())
drm_job_state.efile = "%s.drmerr" % os.path.join(os.getcwd(),
job_wrapper.working_directory, job_wrapper.get_id_tag())
- drm_job_state.rcfile = "%s.drmrc" % os.path.join(os.getcwd(),
job_wrapper.working_directory, job_wrapper.get_id_tag())
+ drm_job_state.ecfile = "%s.drmec" % os.path.join(os.getcwd(),
job_wrapper.working_directory, job_wrapper.get_id_tag())
drm_job_state.job_file = "%s/galaxy_%s.sh" %
(self.app.config.cluster_files_directory, job.id)
drm_job_state.job_id = str( job.job_runner_external_id )
drm_job_state.runner_url = job_wrapper.get_job_runner()
diff -r 319b2b0e832c8ea5ef520c76722f7ad6270507c3 -r
2e0254547b09bfccbd0834524a0fb4919b6a6efd lib/galaxy/jobs/runners/pbs.py
--- a/lib/galaxy/jobs/runners/pbs.py
+++ b/lib/galaxy/jobs/runners/pbs.py
@@ -532,16 +532,22 @@
stdout = ofh.read( 32768 )
stderr = efh.read( 32768 )
# This should be an 8-bit exit code, but read ahead anyway:
- exit_code = ecfh.read(32)
+ exit_code_str = ecfh.read(32)
except:
stdout = ''
stderr = 'Job output not returned by PBS: the output datasets were
deleted while the job was running, the job was manually dequeued or there was a cluster
error.'
# By default, the exit code is 0, which usually indicates success
# (although clearly some error happened).
+ exit_code_str = ""
+
+ # Translate the exit code string to an integer; use 0 on failure.
+ try:
+ exit_code = int( exit_code_str )
+ except:
+ log.warning( "Exit code " + exit_code_str + " was invalid.
Using 0." )
exit_code = 0
- log.debug(stderr)
- log.debug( "Job exit code: " + exit_code )
+ # Call on the job wrapper to complete the call:
try:
pbs_job_state.job_wrapper.finish( stdout, stderr, exit_code )
except:
Repository URL:
https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from
bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.