details: http://www.bx.psu.edu/hg/galaxy/rev/c19605dca27d changeset: 3628:c19605dca27d user: Kelly Vincent <kpvincent@bx.psu.edu> date: Mon Apr 12 15:56:13 2010 -0400 description: Updated NGS tools' stderr handling to use file rather than pipe to allow for large stderr output diffstat: tools/metag_tools/megablast_wrapper.py | 13 +++++++++- tools/samtools/sam_merge.py | 15 ++++++++++-- tools/samtools/sam_pileup.py | 18 ++++++++++++--- tools/samtools/sam_to_bam.py | 27 ++++++++++++++++++----- tools/sr_mapping/bowtie_wrapper.py | 26 +++++++++++++++++----- tools/sr_mapping/bowtie_wrapper.xml | 4 +- tools/sr_mapping/bwa_wrapper.py | 38 +++++++++++++++++++++++++-------- 7 files changed, 109 insertions(+), 32 deletions(-) diffs (406 lines): diff -r f32497f2e001 -r c19605dca27d tools/metag_tools/megablast_wrapper.py --- a/tools/metag_tools/megablast_wrapper.py Mon Apr 12 10:30:25 2010 -0400 +++ b/tools/metag_tools/megablast_wrapper.py Mon Apr 12 15:56:13 2010 -0400 @@ -74,24 +74,33 @@ print megablast_command + tmp = tempfile.NamedTemporaryFile().name try: - proc = subprocess.Popen( args=megablast_command, shell=True, stderr=subprocess.PIPE ) + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=megablast_command, shell=True, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr + if os.path.exists( tmp ): + os.unlink( tmp ) except Exception, e: if os.path.exists( mega_temp_output ): os.unlink( mega_temp_output ) + if os.path.exists( tmp ): + os.unlink( tmp ) stop_err( 'Error indexing reference sequence. ' + str( e ) ) output = open( output_filename, 'w' ) diff -r f32497f2e001 -r c19605dca27d tools/samtools/sam_merge.py --- a/tools/samtools/sam_merge.py Mon Apr 12 10:30:25 2010 -0400 +++ b/tools/samtools/sam_merge.py Mon Apr 12 15:56:13 2010 -0400 @@ -9,7 +9,7 @@ [input3[,input4[,input5[,...]]]] """ -import os, subprocess, sys +import os, subprocess, sys, tempfile def stop_err( msg ): sys.stderr.write( '%s\n' % msg ) @@ -22,22 +22,31 @@ stop_err( 'There are not enough files to merge' ) filenames = sys.argv[3:] cmd = 'samtools merge %s %s %s' % ( outfile, infile, ' '.join( filenames ) ) + tmp = tempfile.NamedTemporaryFile().name try: - proc = subprocess.Popen( args=cmd, shell=True, stderr=subprocess.PIPE ) + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr + if os.path.exists( tmp ): + os.unlink( tmp ) except Exception, e: + if os.path.exists( tmp ): + os.unlink( tmp ) stop_err( 'Error running SAMtools merge tool\n' + str( e ) ) if os.path.getsize( outfile ) > 0: sys.stdout.write( '%s files merged.' % ( len( sys.argv ) - 2 ) ) diff -r f32497f2e001 -r c19605dca27d tools/samtools/sam_pileup.py --- a/tools/samtools/sam_pileup.py Mon Apr 12 10:30:25 2010 -0400 +++ b/tools/samtools/sam_pileup.py Mon Apr 12 15:56:13 2010 -0400 @@ -88,36 +88,46 @@ elif options.ref == 'history': os.symlink( options.ownFile, tmpf1_name ) cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) - proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() #did index succeed? if returncode != 0: raise Exception, 'Error creating index file\n' + stderr cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 ) #perform pileup command - proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() #did it succeed? # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: diff -r f32497f2e001 -r c19605dca27d tools/samtools/sam_to_bam.py --- a/tools/samtools/sam_to_bam.py Mon Apr 12 10:30:25 2010 -0400 +++ b/tools/samtools/sam_to_bam.py Mon Apr 12 15:56:13 2010 -0400 @@ -75,18 +75,23 @@ os.symlink( options.ref_file, fai_index_file_base ) fai_index_file_path = '%s.fai' % fai_index_file_base command = 'samtools faidx %s' % fai_index_file_base - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr if len( open( fai_index_file_path ).read().strip() ) == 0: @@ -104,18 +109,23 @@ # IMPORTANT NOTE: for some reason the samtools view command gzips the resulting bam file without warning, # and the docs do not currently state that this occurs ( very bad ). command = 'samtools view -bt %s -o %s %s' % ( fai_index_file_path, tmp_aligns_file_name, options.input1 ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr if len( open( tmp_aligns_file_name ).read() ) == 0: @@ -133,18 +143,23 @@ tmp_sorted_aligns_file_name = tmp_sorted_aligns_file.name tmp_sorted_aligns_file.close() command = 'samtools sort %s %s' % ( tmp_aligns_file_name, tmp_sorted_aligns_file_name ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: diff -r f32497f2e001 -r c19605dca27d tools/sr_mapping/bowtie_wrapper.py --- a/tools/sr_mapping/bowtie_wrapper.py Mon Apr 12 10:30:25 2010 -0400 +++ b/tools/sr_mapping/bowtie_wrapper.py Mon Apr 12 15:56:13 2010 -0400 @@ -186,25 +186,34 @@ ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, inoref, options.ioffrate, iftab, intoa, iendian, iseed, icutoff, colorspace ) - except ValueError: - indexing_cmds = '%s' % colorspace + except ValueError, e: + # clean up temp dir + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Something is wrong with the indexing parameters and the indexing and alignment could not be run\n' + str( e ) ) ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir ) ref_file_name = ref_file.name ref_file.close() os.symlink( options.ref, ref_file_name ) cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name ) try: - proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=subprocess.PIPE, stdout=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: @@ -358,18 +367,23 @@ else: cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output ) # align - proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr # check that there are results in the output file diff -r f32497f2e001 -r c19605dca27d tools/sr_mapping/bowtie_wrapper.xml --- a/tools/sr_mapping/bowtie_wrapper.xml Mon Apr 12 10:30:25 2010 -0400 +++ b/tools/sr_mapping/bowtie_wrapper.xml Mon Apr 12 15:56:13 2010 -0400 @@ -181,8 +181,8 @@ --maxAlignAttempt="None" --forwardAlign="None" --reverseAlign="None" - #end if - #end if + #end if + #end if </command> <inputs> <conditional name="refGenomeSource"> diff -r f32497f2e001 -r c19605dca27d tools/sr_mapping/bwa_wrapper.py --- a/tools/sr_mapping/bwa_wrapper.py Mon Apr 12 10:30:25 2010 -0400 +++ b/tools/sr_mapping/bwa_wrapper.py Mon Apr 12 15:56:13 2010 -0400 @@ -91,18 +91,23 @@ indexing_cmds = '-a %s' % indexingAlg cmd1 = 'bwa index %s %s' % ( indexing_cmds, ref_file_name ) try: - proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: @@ -166,52 +171,67 @@ try: # align try: - proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_dir, stderr=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: raise Exception, 'Error aligning sequence. ' + str( e ) # and again if paired data try: - if cmd2b: - proc = subprocess.Popen( args=cmd2b, shell=True, cwd=tmp_dir, stderr=subprocess.PIPE ) + if cmd2b: + tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd2b, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: raise Exception, 'Error aligning second sequence. ' + str( e ) # generate align try: - proc = subprocess.Popen( args=cmd3, shell=True, cwd=tmp_dir, stderr=subprocess.PIPE ) + tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd3, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() + tmp_stderr.close() # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) stderr = '' try: while True: - stderr += proc.stderr.read( buffsize ) + stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass + tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: