4 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/4daa56624e2a/ changeset: 4daa56624e2a branch: zero-merge user: peterjc date: 2012-10-19 15:02:54 summary: Error when attempt to merge zero files. Previously the code would try to call cat with no arguments, which would wait for stdin and so do nothing and stall. This error condition could be triggered with a cluster job when Galaxy didn't get any output back. affected #: 1 file diff -r 4cd4f8bef29d17a7bca74e235031060e2289ebdc -r 4daa56624e2a7dbe16c3024eb0871040d1f98b8d lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -536,7 +536,9 @@ TODO: Do we need to merge gzip files using gzjoin? cat seems to work, but might be brittle. Need to revisit this. """ - if len(split_files) == 1: + if not split_files: + raise ValueError('Asked to merge zero files as %s' % output_file) + elif len(split_files) == 1: cmd = 'mv -f %s %s' % ( split_files[0], output_file ) else: cmd = 'cat %s > %s' % ( ' '.join(split_files), output_file ) https://bitbucket.org/galaxy/galaxy-central/changeset/273c02b75ac1/ changeset: 273c02b75ac1 branch: zero-merge user: peterjc date: 2012-10-19 15:51:37 summary: Don't attempt to merge zero files affected #: 1 file diff -r 4daa56624e2a7dbe16c3024eb0871040d1f98b8d -r 273c02b75ac110937c3a0045785df41cef863fff lib/galaxy/jobs/splitters/multi.py --- a/lib/galaxy/jobs/splitters/multi.py +++ b/lib/galaxy/jobs/splitters/multi.py @@ -115,6 +115,7 @@ try: working_directory = job_wrapper.working_directory task_dirs = [os.path.join(working_directory, x) for x in os.listdir(working_directory) if x.startswith('task_')] + assert task_dirs, "Should be at least one sub-task!" # TODO: Output datasets can be very complex. This doesn't handle metadata files outputs = job_wrapper.get_output_hdas_and_fnames() pickone_done = [] @@ -129,10 +130,16 @@ # Just include those files f in the output list for which the # file f exists; some files may not exist if a task fails. output_files = [ f for f in output_files if os.path.exists(f) ] - log.debug('files %s ' % output_files) - output_type.merge(output_files, output_file_name) - log.debug('merge finished: %s' % output_file_name) - pass # TODO: merge all the files + if output_files: + log.debug('files %s ' % output_files) + if len(output_files) < len(task_dirs): + log.debug('merging only %i out of expected %i files for %s' + % (len(output_files), len(task_dirs), output_file_name)) + output_type.merge(output_files, output_file_name) + log.debug('merge finished: %s' % output_file_name) + else: + log.debug('nothing to merge for %s (expected %i files)' + % (output_file_name, len(task_dirs))) elif output in pickone_outputs: # just pick one of them if output not in pickone_done: https://bitbucket.org/galaxy/galaxy-central/changeset/dbd7336b62c9/ changeset: dbd7336b62c9 branch: zero-merge user: peterjc date: 2012-10-19 16:23:02 summary: Log nothing to merge to stderr affected #: 1 file diff -r 273c02b75ac110937c3a0045785df41cef863fff -r dbd7336b62c9abf939ea4081482c744b56838e31 lib/galaxy/jobs/splitters/multi.py --- a/lib/galaxy/jobs/splitters/multi.py +++ b/lib/galaxy/jobs/splitters/multi.py @@ -138,8 +138,10 @@ output_type.merge(output_files, output_file_name) log.debug('merge finished: %s' % output_file_name) else: - log.debug('nothing to merge for %s (expected %i files)' - % (output_file_name, len(task_dirs))) + msg = 'nothing to merge for %s (expected %i files)' \ + % (output_file_name, len(task_dirs)) + log.debug(msg) + stderr += msg + "\n" elif output in pickone_outputs: # just pick one of them if output not in pickone_done: https://bitbucket.org/galaxy/galaxy-central/changeset/ae9d794074bc/ changeset: ae9d794074bc user: smcmanus date: 2012-10-19 21:53:49 summary: Merged in peterjc/galaxy-central/zero-merge (pull request #78) affected #: 2 files diff -r 6db344e2ce554f4332b048dd41bb158e26c49fed -r ae9d794074bcc4732601e480ff54d53b0b2ee6e7 lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -536,7 +536,9 @@ TODO: Do we need to merge gzip files using gzjoin? cat seems to work, but might be brittle. Need to revisit this. """ - if len(split_files) == 1: + if not split_files: + raise ValueError('Asked to merge zero files as %s' % output_file) + elif len(split_files) == 1: cmd = 'mv -f %s %s' % ( split_files[0], output_file ) else: cmd = 'cat %s > %s' % ( ' '.join(split_files), output_file ) diff -r 6db344e2ce554f4332b048dd41bb158e26c49fed -r ae9d794074bcc4732601e480ff54d53b0b2ee6e7 lib/galaxy/jobs/splitters/multi.py --- a/lib/galaxy/jobs/splitters/multi.py +++ b/lib/galaxy/jobs/splitters/multi.py @@ -115,6 +115,7 @@ try: working_directory = job_wrapper.working_directory task_dirs = [os.path.join(working_directory, x) for x in os.listdir(working_directory) if x.startswith('task_')] + assert task_dirs, "Should be at least one sub-task!" # TODO: Output datasets can be very complex. This doesn't handle metadata files outputs = job_wrapper.get_output_hdas_and_fnames() pickone_done = [] @@ -129,10 +130,18 @@ # Just include those files f in the output list for which the # file f exists; some files may not exist if a task fails. output_files = [ f for f in output_files if os.path.exists(f) ] - log.debug('files %s ' % output_files) - output_type.merge(output_files, output_file_name) - log.debug('merge finished: %s' % output_file_name) - pass # TODO: merge all the files + if output_files: + log.debug('files %s ' % output_files) + if len(output_files) < len(task_dirs): + log.debug('merging only %i out of expected %i files for %s' + % (len(output_files), len(task_dirs), output_file_name)) + output_type.merge(output_files, output_file_name) + log.debug('merge finished: %s' % output_file_name) + else: + msg = 'nothing to merge for %s (expected %i files)' \ + % (output_file_name, len(task_dirs)) + log.debug(msg) + stderr += msg + "\n" elif output in pickone_outputs: # just pick one of them if output not in pickone_done: Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.