details: http://www.bx.psu.edu/hg/galaxy/rev/0564441c5043 changeset: 2499:0564441c5043 user: Kelly Vincent <kpvincent@bx.psu.edu> date: Fri Jul 24 15:13:11 2009 -0400 description: Added BWA wrapper tool to Short Read Mapping Tools 5 file(s) affected in this change: tool-data/sequence_index_base.loc.sample tool-data/sequence_index_color.loc.sample tool_conf.xml.sample tools/sr_mapping/bwa_wrapper.py tools/sr_mapping/bwa_wrapper.xml diffs (614 lines): diff -r 643e3cd86e0b -r 0564441c5043 tool-data/sequence_index_base.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/sequence_index_base.loc.sample Fri Jul 24 15:13:11 2009 -0400 @@ -0,0 +1,28 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of BWA indexed sequences data files. You will need +#to create these data files and then create a sequence_index_base.loc file +#similar to this one (store it in this directory ) that points to +#the directories in which those files are stored. The sequence_index_base.loc +#file has this format (white space characters are TAB characters): +# +#<build> <file_base> +# +#So, for example, if you had phiX indexed stored in +#/depot/data2/galaxy/phiX/base/, +#then the sequence_index_base.loc entry would look like this: +# +#phiX /depot/data2/galaxy/phiX/base/phiX.fa +# +#and your /depot/data2/galaxy/phiX/base/ directory +#would contain phiX.fa.* files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 phiX.fa.amb +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 phiX.fa.ann +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 phiX.fa.bwt +#...etc... +# +#Your sequence_index_base.loc file should include an entry per line for +#each index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#phiX /depot/data2/galaxy/phiX/base/phiX.fa diff -r 643e3cd86e0b -r 0564441c5043 tool-data/sequence_index_color.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/sequence_index_color.loc.sample Fri Jul 24 15:13:11 2009 -0400 @@ -0,0 +1,28 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of BWA indexed sequences data files. You will need +#to create these data files and then create a sequence_index_color.loc file +#similar to this one (store it in this directory ) that points to +#the directories in which those files are stored. The sequence_index_color.loc +#file has this format (white space characters are TAB characters): +# +#<build> <file_base> +# +#So, for example, if you had phiX indexed stored in +#/depot/data2/galaxy/phiX/color/, +#then the sequence_index_color.loc entry would look like this: +# +#phiX /depot/data2/galaxy/phiX/color/phiX.fa +# +#and your /depot/data2/galaxy/phiX/color/ directory +#would contain phiX.fa.* files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 phiX.fa.amb +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 phiX.fa.ann +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 phiX.fa.bwt +#...etc... +# +#Your sequence_index_color.loc file should include an entry per line for +#each index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#phiX /depot/data2/galaxy/phiX/color/phiX.fa diff -r 643e3cd86e0b -r 0564441c5043 tool_conf.xml.sample --- a/tool_conf.xml.sample Fri Jul 24 12:16:32 2009 -0400 +++ b/tool_conf.xml.sample Fri Jul 24 15:13:11 2009 -0400 @@ -329,8 +329,9 @@ <tool file="metag_tools/megablast_xml_parser.xml" /> <tool file="metag_tools/blat_wrapper.xml" /> <tool file="metag_tools/mapping_to_ucsc.xml" /> + <tool file="sr_mapping/bwa_wrapper.xml" /> </section> <section name="Tracks" id="tracks"> <tool file="visualization/genetrack.xml" /> - </section> + </section> </toolbox> diff -r 643e3cd86e0b -r 0564441c5043 tools/sr_mapping/bwa_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/sr_mapping/bwa_wrapper.py Fri Jul 24 15:13:11 2009 -0400 @@ -0,0 +1,119 @@ +#! /usr/bin/python + +""" +Runs BWA on single-end or paired-end data. +Produces a SAM file containing the mappings. + +usage: python bwa_wrapper.py reference_sequence indexing_algorithm(is_or_bwtsw) forward_fastq_file reverse_fastq_file(or_None) output alignment_type(single_or_paired) parameters(pre_set_or_full) file_type(solexa_or_solid) file_source(indexed_or_history) maxEditDist fracMissingAligns maxGapOpens maxGapExtens disallowLongDel disallowIndel seed maxEditDistSeed numThreads mismatchPenalty gapOpenPenalty gapExtensPenalty colorSpaceRev suboptAlign noIterSearch outputTopN maxInsertSize maxOccurPairing\nThe last eighteen need to all be specified, or all be None +""" + +import optparse, os, sys, tempfile + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option('', '--ref', dest='ref', help='The reference genome to use or index') + parser.add_option('', '--indexingAlg', dest='indexingAlg', help='The algorithm to use while indexing') + parser.add_option('', '--fastq', dest='fastq', help='The (forward) fastq file to use for the mapping') + parser.add_option('', '--rfastq', dest='rfastq', help='The reverse fastq file to use for mapping if paired-end data') + parser.add_option('', '--output', dest='output', help='The file to save the output (SAM format)') + parser.add_option('', '--genAlignType', dest='genAlignType', help='The type of pairing (single or paired)') + parser.add_option('', '--params', dest='params', help='Parameter setting to use (pre_set or full)') + parser.add_option('', '--fileType', dest='fileType', help='Type of reference sequence file (solid or solexa)') + parser.add_option('', '--fileSource', dest='fileSource', help='Whether to use a previously indexed reference sequence or one form history (indexed or history)') + parser.add_option('-n', '--maxEditDist', dest='maxEditDist', help='Maximum edit distance if integer') + parser.add_option('', '--fracMissingAligns', dest='fracMissingAligns', help='Fraction of missing alignments given 2% uniform base error rate if fraction') + parser.add_option('-o', '--maxGapOpens', dest='maxGapOpens', help='Maximum number of gap opens') + parser.add_option('-e', '--maxGapExtens', dest='maxGapExtens', help='Maximum number of gap extensions') + parser.add_option('-d', '--disallowLongDel', dest='disallowLongDel', help='Disallow a long deletion within specified bps') + parser.add_option('-i', '--disallowIndel', dest='disallowIndel', help='Disallow indel within specified bps') + parser.add_option('-l', '--seed', dest='seed', help='Take the first specified subsequences') + parser.add_option('-k', '--maxEditDistSeed', dest='maxEditDistSeed', help='Maximum edit distance to the seed') + parser.add_option('-t', '--numThreads', dest='numThreads', help='Number of threads') + parser.add_option('-M', '--mismatchPenalty', dest='mismatchPenalty', help='Mismatch penalty') + parser.add_option('-O', '--gapOpenPenalty', dest='gapOpenPenalty', help='Gap open penalty') + parser.add_option('-E', '--gapExtensPenalty', dest='gapExtensPenalty', help='Gap extension penalty') + parser.add_option('-c', '--colorSpaceRev', dest='colorSpaceRev', help="Reverse query but don't complement it") + parser.add_option('-R', '--suboptAlign', dest='suboptAlign', help='Proceed with suboptimal alignments even if the top hit is a repeat') + parser.add_option('-N', '--noIterSearch', dest='noIterSearch', help='Disable iterative search') + parser.add_option('', '--outputTopN', dest='outputTopN', help='Output top specified hits') + parser.add_option('', '--maxInsertSize', dest='maxInsertSize', help='Maximum insert size for a read pair to be considered mapped good') + parser.add_option('', '--maxOccurPairing', dest='maxOccurPairing', help='Maximum occurrences of a read for pairings') + (options, args) = parser.parse_args() + + # index if necessary + if options.fileSource == 'history': + # make temp directory for placement of indices and copy reference file there + tmp_dir = tempfile.gettempdir() + try: + os.system('cp %s %s' % (options.ref, tmp_dir)) + except Exception, erf: + stop_err('Error creating temp directory for indexing purposes\n' + str(erf)) + if options.fileType == 'solid': + indexing_cmds = '-c -a %s' % options.indexingAlg + else: + indexing_cmds = '-a %s' % options.indexingAlg + options.ref = os.path.join(tmp_dir,os.path.split(options.ref)[1]) + cmd1 = 'bwa index %s %s 2> /dev/null' % (indexing_cmds, options.ref) + try: + os.system(cmd1) + except Exception, erf: + stop_err('Error indexing reference sequence\n' + str(erf)) + + # set up aligning and generate aligning command options + if options.params == 'pre_set': + if options.fileType == 'solid': + aligning_cmds = '-c' + else: + aligning_cmds = '' + gen_alignment_cmds = '' + else: + aligning_cmds = '-n %s -o %s -e %s -d %s -i %s %s -k %s -t %s -M %s -O %s -E %s %s %s %s' % \ + ((options.fracMissingAligns, options.maxEditDist)[options.maxEditDist != '0'], + options.maxGapOpens, options.maxGapExtens, options.disallowLongDel, + options.disallowIndel, ('',' -l %s'%options.seed)[options.seed!=-1], + options.maxEditDistSeed, options.numThreads, options.mismatchPenalty, + options.gapOpenPenalty, options.gapExtensPenalty, ('',' -c')[options.colorSpaceRev=='true'], + ('',' -R')[options.suboptAlign=='true'], ('',' -N')[options.noIterSearch=='true']) + if options.genAlignType == 'single': + gen_alignment_cmds = '-n %s' % options.outputTopN + elif options.genAlignType == 'paired': + gen_alignment_cmds = '-a %s -o %s' % (options.maxInsertSize, options.maxOccurPairing) + + # set up output file + file(options.output,'w').write('QNAME\tFLAG\tRNAME\tPOS\tMAPQ\tCIGAR\tMRNM\tMPOS\tISIZE\tSEQ\tQUAL\tOPT\n') + tmp_align_out = tempfile.NamedTemporaryFile() + + # prepare actual aligning and generate aligning commands + cmd2 = 'bwa aln %s %s %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.fastq, tmp_align_out.name) + cmd2b = '' + if options.genAlignType == 'paired': + tmp_align_out2 = tempfile.NamedTemporaryFile() + cmd2b = 'bwa aln %s %s %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.rfastq, tmp_align_out2.name) + cmd3 = 'bwa sampe %s %s %s %s %s %s >> %s 2> /dev/null' % (gen_alignment_cmds, options.ref, tmp_align_out.name, tmp_align_out2.name, options.fastq, options.rfastq, options.output) + else: + cmd3 = 'bwa samse %s %s %s %s >> %s 2> /dev/null' % (gen_alignment_cmds, options.ref, tmp_align_out.name, options.fastq, options.output) + + # align + try: + os.system(cmd2) + except Exception, erf: + stop_err("Error aligning sequence\n" + str(erf)) + # and again if paired data + try: + if cmd2b: + os.system(cmd2b) + except Exception, erf: + stop_err("Error aligning second sequence\n" + str(erf)) + + # generate align + try: + os.system(cmd3) + except Exception, erf: + stop_err("Error sequence aligning sequence\n" + str(erf)) + +if __name__=="__main__": __main__() diff -r 643e3cd86e0b -r 0564441c5043 tools/sr_mapping/bwa_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/sr_mapping/bwa_wrapper.xml Fri Jul 24 15:13:11 2009 -0400 @@ -0,0 +1,409 @@ +<tool id="bwa_wrapper" name="BWA" version="1.0.0"> + <description> fast mapping of reads against reference sequence</description> + <command interpreter="python"> + bwa_wrapper.py + #if $solidOrSolexa.solidRefGenomeSource.refGenomeSource == "history": + --ref=$solidOrSolexa.solidRefGenomeSource.ownFile + --indexingAlg=$solidOrSolexa.solidRefGenomeSource.algorithm + #else: + --ref=$solidOrSolexa.solidRefGenomeSource.indices.value + --indexingAlg="None" + #end if + --fastq=$paired.input1 + #if $paired.sPaired == "paired": + --rfastq=$paired.input2 + #else: + --rfastq="None" + #end if + --output=$output + --genAlignType=$paired.sPaired + --params=$params.source_select + --fileType=$solidOrSolexa.solidSolexa + --fileSource=$solidOrSolexa.solidRefGenomeSource.refGenomeSource + #if $params.source_select == "pre_set": + --maxEditDist="None" + --fracMissingAligns="None" + --maxGapOpens="None" + --maxGapExtens="None" + --disallowLongDel="None" + --disallowIndel="None" + --seed="None" + --maxEditDistSeed="None" + --numThreads="None" + --mismatchPenalty="None" + --gapOpenPenalty="None" + --gapExtensPenalty="None" + --colorSpaceRev="None" + --suboptAlign="None" + --noIterSearch="None" + --outputTopN="None" + --maxInsertSize="None" + --maxOccurPairing="None" + #else: + --maxEditDist=$params.maxEditDist + --fracMissingAligns=$params.fracMissingAligns + --maxGapOpens=$params.maxGapOpens + --maxGapExtens=$params.maxGapExtens + --disallowLongDel=$params.disallowLongDel + --disallowIndel=$params.disallowIndel + --seed=$params.seed + --maxEditDistSeed=$params.maxEditDistSeed + --numThreads=$params.numThreads + --mismatchPenalty=$params.mismatchPenalty + --gapOpenPenalty=$params.gapOpenPenalty + --gapExtensPenalty=$params.gapExtensPenalty + --colorSpaceRev=$params.colorSpaceRev + --suboptAlign=$params.suboptAlign + --noIterSearch=$params.noIterSearch + --outputTopN=$params.outputTopN + --maxInsertSize=$params.maxInsertSize + --maxOccurPairing=$params.maxOccurPairing + #end if + </command> + <inputs> + <conditional name="solidOrSolexa"> + <param name="solidSolexa" type="select" label="Select SOLiD or Solexa format for the original dataset"> + <option value="solexa">Solexa</option> + <option value="solid">SOLiD</option> + </param> + <when value="solid"> + <conditional name="solidRefGenomeSource"> + <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="history"> + <param name="ownFile" type="data" label="Select a reference genome" /> + <param name="algorithm" type="select" label="Select an indexing algorithm" help="IS works on databses 2GB or less, and is linear-time. BWT-SW works on database 10MB and larger, and trades speed for memory."> + <option value="is">IS</option> + <option value="bwtsw">BWT-SW</option> + </param> + </when> + <when value="indexed"> + <param name="indices" type="select" label="Select a reference genome"> + <options from_file="sequence_index_color.loc"> + <column name="value" index="1" /> + <column name="name" index="0" /> + <filter type="sort_by" column="0" /> + </options> + </param> + </when> + </conditional> + </when> + <when value="solexa"> + <conditional name="solidRefGenomeSource"> + <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="history"> + <param name="ownFile" type="data" label="Select a reference genome" /> + <param name="algorithm" type="select" label="Select an indexing algorithm" help="IS works on databses 2GB or less, and is linear-time. BWT-SW works on database 10MB and larger, and trades speed for memory."> + <option value="is">IS</option> + <option value="bwtsw">BWT-SW</option> + </param> + </when> + <when value="indexed"> + <param name="indices" type="select" label="Select a reference genome"> + <options from_file="sequence_index_base.loc"> + <column name="value" index="1" /> + <column name="name" index="0" /> + <filter type="sort_by" column="0" /> + </options> + </param> + </when> + </conditional> + </when> + </conditional> + <conditional name="paired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="input1" type="data" label="FASTQ file" /> + </when> + <when value="paired"> + <param name="input1" type="data" label="Forward FASTQ file" /> + <param name="input2" type="data" label="Reverse FASTQ file" /> + </when> + </conditional> + <conditional name="params"> + <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List"> + <option value="pre_set">Commonly used</option> + <option value="full">Full Parameter List</option> + </param> + <when value="pre_set" /> + <when value="full"> + <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance" help="Enter this value OR a fraction of missing alignments, not both" /> + <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate" help="Enter this value OR maximum edit distance, not both" /> + <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens" /> + <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions" help="-1 for k-difference mode (disallowing long gaps)" /> + <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] towards the 3'-end" /> + <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end" /> + <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed" help="Enter -1 for infinity" /> + <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed" /> + <param name="numThreads" type="integer" value="1" label="Number of threads, in multi-threading mode" /> + <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty" help="BWA will not search for suboptimal hits with a score lower than [value]" /> + <param name="gapOpenPenalty" type="integer" value="1" label="Gap open penalty" /> + <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty" /> + <param name="colorSpaceRev" type="select" label="Reverse query but don't compement it" help="Reverse query for all alignment in color space"> + <option value="false">Don't reverse query</option> + <option value="true">Reverse query</option> + </param> + <param name="suboptAlign" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Proceed with suboptimal alignments even if the top hit is a repeat" help="By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" /> + <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default." /> + <param name="outputTopN" type="integer" value="-1" label="Output top [value] hits" help="For single-end reads only. Enter -1 to disable outputting multiple hits" /> + <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly" help="For paired-end reads only. Only used when there are not enough good alignment to infer the distribution of insert sizes" /> + <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="output" /> + </outputs> +<!-- Tests all fail because of problem with nested conditionals in test framework + <tests> + <test> + <param name="solidSolexa" value="solexa" /> + <param name="refGenomeSource" value="indexed" /> + <param name="indices" value="phiX" /> + <param name="sPaired" value="single" /> + <param name="input1" value="bwa_phiX_sanger.fastq" /> + <param name="source_select" value="pre_set" /> + <output name="output" file="bwa_wrapper_out0.tabular" /> + </test> + <test> + <param name="solidSolexa" value="solid" /> + <param name="refGenomeSource" value="history" /> + <param name="ownFile" value="phiX.fa" /> + <param name="algorithm" value="is" /> + <param name="sPaired" value="single" /> + <param name="input1" value="bwa_phiX_sanger.fastq" /> + <param name="source_select" value="pre_set" /> + <output name="output" file="bwa_wrapper_out0b.tabular" /> + </test> + <test> + <param name="solidSolexa" value="solid" /> + <param name="refGenomeSource" value="indexed" /> + <param name="indices" value="phiX" /> + <param name="sPaired" value="single" /> + <param name="input1" value="bwa_solid.fastq" /> + <param name="source_select" value="full" /> + <param name="maxEditDist" value="0" /> + <param name="fracMissingAligns" value="0.04" /> + <param name="maxGapOpens" value="1" /> + <param name="maxGapExtens" value="-1" /> + <param name="disallowLongDel" value="16" /> + <param name="disallowIndel" value="5" /> + <param name="seed" value="-1" /> + <param name="maxEditDistSeed" value="2" /> + <param name="numThreads" value="1" /> + <param name="mismatchPenalty" value="3" /> + <param name="gapOpenPenalty" value="1" /> + <param name="gapExtensPenalty" value="4" /> + <param name="colorSpaceRev" value="true" /> + <param name="suboptAlign" value="true" /> + <param name="noIterSearch" value="true" /> + <param name="outputTopN" value="-1" /> + <param name="maxInsertSize" value="500" /> + <param name="maxOccurPairing" value="100000" /> + <output name="output" file="bwa_wrapper_out1.tabular" /> + </test> + <test> + <param name="solidSolexa" value="solid" /> + <param name="refGenomeSource" value="indexed" /> + <param name="indices" value="phiX" /> + <param name="sPaired" value="paired" /> + <param name="input1" value="bwa_solid_f.fastq" /> + <param name="input2" value="bwa_solid_r.fastq" /> + <param name="source_select" value="full" /> + <param name="maxEditDist" value="0" /> + <param name="fracMissingAligns" value="0.04" /> + <param name="maxGapOpens" value="1" /> + <param name="maxGapExtens" value="-1" /> + <param name="disallowLongDel" value="16" /> + <param name="disallowIndel" value="5" /> + <param name="seed" value="-1" /> + <param name="maxEditDistSeed" value="2" /> + <param name="numThreads" value="1" /> + <param name="mismatchPenalty" value="3" /> + <param name="gapOpenPenalty" value="1" /> + <param name="gapExtensPenalty" value="4" /> + <param name="colorSpaceRev" value="true" /> + <param name="suboptAlign" value="true" /> + <param name="noIterSearch" value="true" /> + <param name="outputTopN" value="-1" /> + <param name="maxInsertSize" value="500" /> + <param name="maxOccurPairing" value="100000" /> + <output name="output" file="bwa_wrapper_out2.tabular" /> + </test> + <test> + <param name="solidSolexa" value="solexa" /> + <param name="refGenomeSource" value="indexed" /> + <param name="indices" value="phiX" /> + <param name="sPaired" value="single" /> + <param name="input1" value="bwa_phiX_sanger.fastq" /> + <param name="source_select" value="full" /> + <param name="maxEditDist" value="0" /> + <param name="fracMissingAligns" value="0.04" /> + <param name="maxGapOpens" value="1" /> + <param name="maxGapExtens" value="-1" /> + <param name="disallowLongDel" value="16" /> + <param name="disallowIndel" value="5" /> + <param name="seed" value="-1" /> + <param name="maxEditDistSeed" value="2" /> + <param name="numThreads" value="1" /> + <param name="mismatchPenalty" value="3" /> + <param name="gapOpenPenalty" value="1" /> + <param name="gapExtensPenalty" value="4" /> + <param name="colorSpaceRev" value="false" /> + <param name="suboptAlign" value="true" /> + <param name="noIterSearch" value="true" /> + <param name="outputTopN" value="-1" /> + <param name="maxInsertSize" value="500" /> + <param name="maxOccurPairing" value="100000" /> + <output name="output" file="bwa_wrapper_out3.tabular" /> + </test> + <test> + <param name="solidSolexa" value="solexa" /> + <param name="refGenomeSource" value="indexed" /> + <param name="indices" value="phiX" /> + <param name="sPaired" value="paired" /> + <param name="input1" value="bwa_phiX_sanger_f.fastq" /> + <param name="input2" value="bwa_phiX_sanger_r.fastq" /> + <param name="source_select" value="full" /> + <param name="maxEditDist" value="0" /> + <param name="fracMissingAligns" value="0.04" /> + <param name="maxGapOpens" value="1" /> + <param name="maxGapExtens" value="-1" /> + <param name="disallowLongDel" value="16" /> + <param name="disallowIndel" value="5" /> + <param name="seed" value="-1" /> + <param name="maxEditDistSeed" value="2" /> + <param name="numThreads" value="1" /> + <param name="mismatchPenalty" value="3" /> + <param name="gapOpenPenalty" value="1" /> + <param name="gapExtensPenalty" value="4" /> + <param name="colorSpaceRev" value="false" /> + <param name="suboptAlign" value="true" /> + <param name="noIterSearch" value="true" /> + <param name="outputTopN" value="-1" /> + <param name="maxInsertSize" value="500" /> + <param name="maxOccurPairing" value="100000" /> + <output name="output" file="bwa_wrapper_out4.tabular" /> + </test> + </tests> +--> + <help> + +**What it does** + +**BWA** is a high performance sequence aligner that succeeds MAQ. It is based on BWT-SW but uses a completely different algorithm, and it is aimed toward short read alignments. It is fast--it can map the human genome in only 15-25 minutes. Heng Li of the Sanger Institute wrote the majority of the code, with contributions by Chi-Kwong Wong at the University of Hong Kong, Nong Ge at Sun Yat-Sen University, and Yuta Mori. + +------ + +**Input formats** + +BWA accepts files in FASTQ format. + +------ + +**Outputs** + +The output is in SAM format, and has the following columns:: + + 1 QNAME - Query (pair) NAME + 2 FLAG - bitwise FLAG + 3 RNAME - Reference sequence NAME + 4 POS - 1-based leftmost POSition/coordinate of clipped sequence + 5 MAPQ - MAPping Quality (Phred-scaled) + 6 CIGAR - extended CIGAR string + 7 MRNM - Mate Reference sequence NaMe ('=' if same as RNAME) + 8 MPOS - 1-based Mate POSition + 9 ISIZE - Inferred insert SIZE + 10 SEQ - query SEQuence on the same strand as the reference + 11 QUAL - query QUALity (ASCII-33 gives the Phred base quality) + 12 OPT - variable OPTional fields in the format TAG:VTYPE:VALU + +The flags are as follows:: + + Flag - Description + 0x0001 - the read is paired in sequencing + 0x0002 - the read is mapped in a proper pair + 0x0004 - the query sequence itself is unmapped + 0x0008 - the mate is unmapped + 0x0010 - strand of the query (1 for reverse) + 0x0020 - strand of the mate + 0x0040 - the read is the first read in a pair + 0x0080 - the read is the second read in a pair + 0x0100 - the alignment is not primary + +It looks like this (scroll sideways to see the entire example):: + + QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT + HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh + HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh + +------- + +**BWA Settings** + +All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here. + +------ + +**BWA parameter list** + +This is an exhaustive list of BWA options: + +For **aln**:: + + -n NUM Maximum edit distance if the value is INT, or the fraction of missing + alignments given 2% uniform base error rate if FLOAT. In the latter + case, the maximum edit distance is automatically chosen for different + read lengths. [0.04] + -o INT Maximum number of gap opens [1] + -e INT Maximum number of gap extensions, -1 for k-difference mode + (disallowing long gaps) [-1] + -d INT Disallow a long deletion within INT bp towards the 3'-end [16] + -i INT Disallow an indel within INT bp towards the ends [5] + -l INT Take the first INT subsequence as seed. If INT is larger than the + query sequence, seeding will be disabled. For long reads, this option + is typically ranged from 25 to 35 for '-k 2'. [inf] + -k INT Maximum edit distance in the seed [2] + -t INT Number of threads (multi-threading mode) [1] + -M INT Mismatch penalty. BWA will not search for suboptimal hits with a score + lower than (bestScore-misMsc). [3] + -O INT Gap open penalty [11] + -E INT Gap extension penalty [4] + -c Reverse query but not complement it, which is required for alignment + in the color space. + -R Proceed with suboptimal alignments even if the top hit is a repeat. By + default, BWA only searches for suboptimal alignments if the top hit is + unique. Using this option has no effect on accuracy for single-end + reads. It is mainly designed for improving the alignment accuracy of + paired-end reads. However, the pairing procedure will be slowed down, + especially for very short reads (~32bp). + -N Disable iterative search. All hits with no more than maxDiff + differences will be found. This mode is much slower than the default. + +For **samse**:: + + -n INT Output up to INT top hits. Value -1 to disable outputting multiple + hits. [-1] + +For **sampe**:: + + -a INT Maximum insert size for a read pair to be considered as being mapped + properly. Since 0.4.5, this option is only used when there are not + enough good alignment to infer the distribution of insert sizes. [500] + -o INT Maximum occurrences of a read for pairing. A read with more + occurrences will be treated as a single-end read. Reducing this + parameter helps faster pairing. [100000] + + + </help> +</tool> + +