[hg] galaxy 2859: Modified Bowtie wrapper to directly output SAM...
details: http://www.bx.psu.edu/hg/galaxy/rev/e02620ad3f41 changeset: 2859:e02620ad3f41 user: Kelly Vincent <kpvincent@bx.psu.edu> date: Thu Oct 08 18:07:02 2009 -0400 description: Modified Bowtie wrapper to directly output SAM format, removing header as necessary 5 file(s) affected in this change: test-data/bowtie_out1.sam test-data/bowtie_out2.sam test-data/bowtie_out3.sam tools/sr_mapping/bowtie_wrapper.py tools/sr_mapping/bowtie_wrapper.xml diffs (317 lines): diff -r 50e2d1a49815 -r e02620ad3f41 test-data/bowtie_out1.sam --- a/test-data/bowtie_out1.sam Thu Oct 08 15:53:11 2009 -0400 +++ b/test-data/bowtie_out1.sam Thu Oct 08 18:07:02 2009 -0400 @@ -1,1 +1,1 @@ -HWI-EAS91_1_30788AAXX:1:1:1513:715 16 chrM 9563 25 36M * 0 0 CTGACTACCACAACTAAACATCTATGCNNAAAAAAC I+-II?IDIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1513:715/1 16 chrM 9563 255 36M * 0 0 CTGACTACCACAACTAAACATCTATGCNNAAAAAAC I+-II?IDIIIIIIIIIIIIIIIIIII""IIIIIII XA:i:2 MD:Z:27A0G7 NM:i:2 diff -r 50e2d1a49815 -r e02620ad3f41 test-data/bowtie_out2.sam --- a/test-data/bowtie_out2.sam Thu Oct 08 15:53:11 2009 -0400 +++ b/test-data/bowtie_out2.sam Thu Oct 08 18:07:02 2009 -0400 @@ -1,2 +1,2 @@ -HWI-EAS91_1_30788AAXX:1:2:618:346 0 chrM 441 25 36M * 0 0 TAGACTACGAAAGTGACTTTAATACCTCTGACTACA IIIIIIIIIIIIIIIIIIIIIIIIIIIII%4II;I3 NM:i:0 X0:i:1 MD:Z:36 -HWI-EAS91_1_30788AAXX:1:2:618:346 16 chrM 652 25 36M * 0 0 CGATAAACCCCACCATCCATTGCTAATTCAGCCTAT )2I*IIIIIIFD6II5II.I9IIIIIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:17A18 +HWI-EAS91_1_30788AAXX:1:2:618:346/1 99 chrM 441 255 36M = 652 247 TAGACTACGAAAGTGACTTTAATACCTCTGACTACA IIIIIIIIIIIIIIIIIIIIIIIIIIIII%4II;I3 XA:i:0 MD:Z:36 NM:i:0 +HWI-EAS91_1_30788AAXX:1:2:618:346/2 147 chrM 652 255 36M = 441 -247 CGATAAACCCCACCATCCATTGCTAATTCAGCCTAT )2I*IIIIIIFD6II5II.I9IIIIIIIIIIIIIII XA:i:1 MD:Z:18C17 NM:i:1 diff -r 50e2d1a49815 -r e02620ad3f41 test-data/bowtie_out3.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie_out3.sam Thu Oct 08 18:07:02 2009 -0400 @@ -0,0 +1,2 @@ +HWI-EAS91_1_30788AAXX:1:2:618:346/1 99 chrM 441 255 36M = 652 247 TAGACTACGAAAGTGACTTTAATACCTCTGACTACA IIIIIIIIIIIIIIIIIIIIIIIIIIIII%4II;I3 XA:i:0 MD:Z:36 NM:i:0 +HWI-EAS91_1_30788AAXX:1:2:618:346/2 147 chrM 652 255 36M = 441 -247 CGATAAACCCCACCATCCATTGCTAATTCAGCCTAT )2I*IIIIIIFD6II5II.I9IIIIIIIIIIIIIII XA:i:0 MD:Z:18C17 NM:i:1 diff -r 50e2d1a49815 -r e02620ad3f41 tools/sr_mapping/bowtie_wrapper.py --- a/tools/sr_mapping/bowtie_wrapper.py Thu Oct 08 15:53:11 2009 -0400 +++ b/tools/sr_mapping/bowtie_wrapper.py Thu Oct 08 18:07:02 2009 -0400 @@ -33,7 +33,6 @@ parser.add_option('', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read') parser.add_option('', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read') parser.add_option('', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist') - parser.add_option('', '--offbase', dest='offbase', help='Number the first base of a reference sequence as n when outputting alignments') parser.add_option('', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions") parser.add_option('', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read') parser.add_option('', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable') @@ -43,7 +42,6 @@ parser.add_option('', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate') parser.add_option('', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand') parser.add_option('', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand') - parser.add_option('', '--phased', dest='phased', help='Whether or not it should alternate between using the forward and mirror indexes in a series of phases so that only half of the index is resident in memory at one time') parser.add_option('', '--offrate', dest='offrate', help='Override the offrate of the index to n') parser.add_option('', '--seed', dest='seed', help='Seed for pseudo-random number generator') parser.add_option('', '--dbkey', dest='dbkey', help='') @@ -61,8 +59,8 @@ parser.add_option('', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file') parser.add_option('', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator') parser.add_option('', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index') - parser.add_option('', '--ioldpmap', dest='ioldpmap', help='Use the scheme for mapping joined reference locations to original reference locations used in versions of Bowtie prior to 0.9.8') parser.add_option('', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set') + parser.add_option('', '--suppressHeader', dest='suppressHeader', help='Suppress header') (options, args) = parser.parse_args() # index if necessary @@ -72,7 +70,7 @@ indexing_cmds = '' else: try: - indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \ + indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \ (('','--noauto')[options.iauto_b=='set'], ('','--packed')[options.ipacked=='packed'], ('','--bmax %s'%options.ibmax)[options.ibmax!='None' and options.ibmax>=1], @@ -84,8 +82,7 @@ ('','--ntoa')[options.intoa=='yes'], ('--little','--big')[options.iendian=='big'], ('','--seed %s'%options.iseed)[int(options.iseed)>0], - ('','--cutoff %s'%options.icutoff)[int(options.icutoff)>0], - ('','--oldpmap')[options.ioldpmap=='yes']) + ('','--cutoff %s'%options.icutoff)[int(options.icutoff)>0]) except ValueError: indexing_cmds = '' @@ -105,11 +102,11 @@ # set up aligning and generate aligning command options # automatically set threads to 8 in both cases if options.params == 'pre_set': - aligning_cmds = '-p %s' % options.threads + aligning_cmds = '-p %s -S' % options.threads else: try: aligning_cmds = '%s %s %s %s %s %s %s %s %s %s %s %s %s %s ' \ - '%s %s %s %s %s %s %s %s %s %s %s %s -p %s' % \ + '%s %s %s %s %s %s %s %s %s %s -p %s -S' % \ (('','-s %s'%options.skip)[options.skip!='None'], ('','-u %s'%options.alignLimit)[int(options.alignLimit)>0], ('','-5 %s'%options.trimH)[int(options.trimH)>=0], @@ -132,41 +129,47 @@ ('','-m %s'%options.suppressAlign)[int(options.suppressAlign)>=0], ('','--best')[options.best=='doBest'], ('','--strata')[options.strata=='doStrata'], - ('','-B %s'%options.offbase)[int(options.offbase)>=0], - ('','-z %s'%options.phased)[options.phased!='None'], ('','-o %s'%options.offrate)[int(options.offrate)>=0], ('','--seed %s'%options.seed)[int(options.seed)>=0], options.threads) except ValueError: aligning_cmds = '-p %s' % options.threads - tmp_out = tempfile.NamedTemporaryFile() - # prepare actual aligning commands if options.paired == 'paired': - cmd2 = 'bowtie %s %s -1 %s -2 %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.input1, options.input2, tmp_out.name) + cmd2 = 'bowtie %s %s -1 %s -2 %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.input1, options.input2, options.output) else: - cmd2 = 'bowtie %s %s %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.input1, tmp_out.name) - # prepare command to convert bowtie output to sam and alternative - cmd3 = 'bowtie2sam.pl %s > %s' % (tmp_out.name, options.output) - cmd4 = 'cp %s %s' % (tmp_out.name, options.output) + cmd2 = 'bowtie %s %s %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.input1, options.output) # align try: os.system(cmd2) except Exception, erf: stop_err("Error aligning sequence\n" + str(erf)) - if len(file(tmp_out.name,'r').read()) > 0: - #convert + + # remove header if necessary + if options.suppressHeader == 'true': + tmp_out = tempfile.NamedTemporaryFile() + cmd3 = 'cp %s %s' % (options.output, tmp_out.name) try: os.system(cmd3) except Exception, erf: - stop_err('Error converting output to sam format\n' + str(erf)) - else: - try: - os.system(cmd4) - sys.stdout.write('Alignment file contained no data') - except Exception, erf: - stop_err('Error producing alignment file. File contained no data.\n' + str(erf)) + stop_err("Error copying output file before removing headers\n" + str(erf)) + output = file(tmp_out.name, 'r') + fout = file(options.output, 'w') + header = True + line = output.readline() + while line.strip() != '': + if header: + if line.startswith('@HD') or line.startswith('@SQ') or line.startswith('@RG') or line.startswith('@PG') or line.startswith('@CO'): + pass + else: + header = False + fout.write(line) + else: + fout.write(line) + line = output.readline() + fout.close() + tmp_out.close() if __name__=="__main__": __main__() diff -r 50e2d1a49815 -r e02620ad3f41 tools/sr_mapping/bowtie_wrapper.xml --- a/tools/sr_mapping/bowtie_wrapper.xml Thu Oct 08 15:53:11 2009 -0400 +++ b/tools/sr_mapping/bowtie_wrapper.xml Thu Oct 08 18:07:02 2009 -0400 @@ -32,22 +32,15 @@ --valAlign=$singlePaired.params.valAlign --allValAligns=$singlePaired.params.allValAligns --suppressAlign=$singlePaired.params.suppressAlign - --offbase=$singlePaired.params.offbase --offrate=$singlePaired.params.offrate --seed=$singlePaired.params.seed --best=$singlePaired.params.bestOption.best #if $singlePaired.params.bestOption.best == "doBest": --maxBacktracks=$singlePaired.params.bestOption.maxBacktracks --strata=$singlePaired.params.bestOption.strata - --phased="None" #else: --maxBacktracks="None" --strata="None" - #if $singlePaired.sPaired =="single": - --phased=$singlePaired.params.bestOption.phased - #else: - --phased="None" - #end if #end if #if $singlePaired.sPaired == "single": --minInsert="None" @@ -78,7 +71,6 @@ --valAlign="None" --allValAligns="None" --suppressAlign="None" - --offbase="None" --best="None" --maxBacktracks="None" --strata="None" @@ -88,7 +80,6 @@ --maxAlignAttempt="None" --forwardAlign="None" --reverseAlign="None" - --phased="None" --offrate="None" --seed="None" #end if @@ -123,7 +114,6 @@ --iendian=$refGenomeSource.indexParams.endian --iseed=$refGenomeSource.indexParams.seed --icutoff=$refGenomeSource.indexParams.cutoff - --ioldpmap=$refGenomeSource.indexParams.oldpmap #else: --iauto_b="None" --ipacked="None" @@ -138,8 +128,8 @@ --iendian="None" --iseed="None" --icutoff="None" - --ioldpmap="None" #end if + --suppressHeader=$suppressHeader </command> <inputs> <conditional name="refGenomeSource"> @@ -200,10 +190,6 @@ </param> <param name="seed" type="integer" value="-1" label="Seed for the pseudorandom number generator (--seed)" help="Use -1 to use default" /> <param name="cutoff" type="integer" value="-1" label="Number of first bases of the reference sequence to index (--cutoff)" help="Use -1 to use default" /> - <param name="oldpmap" type="select" label="Use the scheme for mapping joined reference locations to original reference locations used in versions of Bowtie prior to 0.9.8 (--oldpmap)" help="The old scheme uses padding and the new one doesn't"> - <option value="no">Use the new scheme</option> - <option value="yes">Use the old scheme</option> - </param> </when> <!-- index_full --> </conditional> </when> @@ -244,7 +230,6 @@ <option value="doAllValAligns">Report all valid alignments</option> </param> <param name="suppressAlign" type="integer" value="-1" label="Suppress all alignments for a read if more than n reportable alignments exist (-m)" help="-1 for no limit" /> - <param name="offbase" type="integer" value="0" label="Number the first base of a reference sequence as n when outputting alignments (-B)" /> <conditional name="bestOption"> <param name="best" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option"> <option value="noBest">Do not use best</option> @@ -252,10 +237,6 @@ </param> <when value="noBest"> <param name="maxBacktracks" type="integer" value="125" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> - <param name="phased" type="select" label="Whether or not it should alternate between using the forward and mirror indexes in a series of phases so that only half of the index is resident in memory at one time (-z)"> - <option value="noPhased">Don't alternate</option> - <option value="doPhased">Do alternate</option> - </param> </when> <when value="doBest"> <param name="maxBacktracks" type="integer" value="800" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> @@ -318,7 +299,6 @@ <option value="doAllValAligns">Report all valid alignments</option> </param> <param name="suppressAlign" type="integer" value="-1" label="Suppress all alignments for a pair if more than n reportable alignments exist (-m)" help="-1 for no limit" /> - <param name="offbase" type="integer" value="0" label="Number the first base of a reference sequence as n when outputting alignments (-B)" /> <conditional name="bestOption"> <param name="best" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option"> <option value="noBest">Do not use best</option> @@ -341,6 +321,7 @@ </conditional> <!-- params --> </when> <!-- paired --> </conditional> <!-- singlePaired --> + <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Suppress the header in the output SAM file" help="Bowtie produces SAM with several lines of header information" /> </inputs> <outputs> <data format="sam" name="output" /> @@ -352,6 +333,7 @@ <param name="sPaired" value="single" /> <param name="input1" ftype="fastqsanger" value="bowtie_in1.fastq" /> <param name="settings_type" value="pre_set" /> + <param name="suppressHeader" value="true" /> <output name="output" ftype="sam" file="bowtie_out1.sam" /> </test> <test> @@ -362,6 +344,7 @@ <param name="input1" ftype="fastqsanger" value="bowtie_in2.fastq" /> <param name="input2" ftype="fastqsanger" value="bowtie_in3.fastq" /> <param name="settings_type" value="pre_set" /> + <param name="suppressHeader" value="true" /> <output name="output" ftype="sam" file="bowtie_out2.sam" /> </test> <test> @@ -381,10 +364,10 @@ <param name="endian" value="little" /> <param name="seed" value="-1" /> <param name="cutoff" value="-1" /> - <param name="oldpmap" value="no" /> <param name="sPaired" value="single" /> <param name="input1" ftype="fastqsanger" value="bowtie_in1.fastq" /> <param name="settings_type" value="pre_set" /> + <param name="suppressHeader" value="true" /> <output name="output" ftype="sam" file="bowtie_out1.sam" /> </test> <test> @@ -413,13 +396,13 @@ <param name="valAlign" value="1" /> <param name="allValAligns" value="noAllValAligns" /> <param name="suppressAlign" value="-1" /> - <param name="offbase" value="0" /> <param name="best" value="doBest" /> <param name="maxBacktracks" value="800" /> <param name="strata" value="noStrata" /> <param name="offrate" value="-1" /> <param name="seed" value="403" /> - <output name="output" ftype="sam" file="bowtie_out2.sam" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out3.sam" /> </test> </tests> <help> @@ -514,7 +497,6 @@ --little Endianness. [--little] --seed <int> Random seed. Use <int> as the seed for the pseudo-random number generator. [off] --cutoff <int> Cutoff. Index only the first <int> bases of the reference sequences (cumulative across sequences) and ignore the rest. [off] - --oldpmap Use old mapping scheme. Use the padding-based scheme from Bowtie versions before 0.9.8 instead of the current scheme. [off] For aligning (bowtie):: -s <int> Skip. Do not align the first <int> reads or pairs in the input. [off] @@ -542,10 +524,7 @@ -m <int> Suppress alignments. Suppress all alignments for a particular read or pair if more than <int> reportable alignments exist for it. [no limit] --best Best mode. Make Bowtie guarantee that reported singleton alignments are "best" in terms of stratum (the number of mismatches) and quality values at mismatched position. [off] --strata Best strata. When running in best mode, report alignments that fall into the best stratum if there are ones falling into more than one. [off] - -B <int> First base number. When outputting alignments, number the first base of a reference sequence as <int>. [0] - -z <int> Phased. Alternate between using the forward and mirror indexes in a series of phases such that only one half of the index is resident in memory at one time. Cannot be used with paired-end alignment. [off] -o <int> Offrate override. Override the offrate of the index with <int>. Some row markings are discarded when index read into memory. <int> must be greater than the value used to build the index (default: 5). [off] - --mm I/O for index loading. Choosing this option means that memory-mapped I/O will be used to load the index instead of the normal POSIX/C file I/O. Allows memory-efficient parallelization where using -p is not desirable. [off] --seed <int> Random seed. Use <int> as the seed for the pseudo-random number generator. [off] </help>
participants (1)
-
Greg Von Kuster