galaxy-dev
Threads by month
- ----- 2025 -----
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- 10007 discussions
16 Sep '09
details: http://www.bx.psu.edu/hg/galaxy/rev/ba884f1a6b81
changeset: 2692:ba884f1a6b81
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Tue Sep 15 10:15:42 2009 -0400
description:
Fix maf_utilities compatibility with python 2.4; STRING.startswith() accepting a tuple was introduced in python 2.5.
1 file(s) affected in this change:
lib/galaxy/tools/util/maf_utilities.py
diffs (23 lines):
diff -r 40c5e1853a66 -r ba884f1a6b81 lib/galaxy/tools/util/maf_utilities.py
--- a/lib/galaxy/tools/util/maf_utilities.py Mon Sep 14 17:03:17 2009 -0400
+++ b/lib/galaxy/tools/util/maf_utilities.py Tue Sep 15 10:15:42 2009 -0400
@@ -403,7 +403,7 @@
else:
blocks.append( ( score, idx, offset ) )
- gap_chars_tuple = tuple( GAP_CHARS )
+ #gap_chars_tuple = tuple( GAP_CHARS )
gap_chars_str = ''.join( GAP_CHARS )
#Loop through ordered blocks and layer by increasing score
for block_dict in blocks:
for block in iter_blocks_split_by_species( block_dict[1].get_at_offset( block_dict[2] ) ): #need to handle each occurance of sequence in block seperately
@@ -415,7 +415,8 @@
#we should trim gaps from both sides, since these are not positions in this species genome (sequence)
text = text.rstrip( gap_chars_str )
gap_offset = 0
- while text.startswith( gap_chars_tuple ):
+ while True in [ text.startswith( gap_char ) for gap_char in GAP_CHARS ]: #python2.4 doesn't accept a tuple for .startswith()
+ #while text.startswith( gap_chars_tuple ):
gap_offset += 1
text = text[1:]
if not text:
1
0
16 Sep '09
details: http://www.bx.psu.edu/hg/galaxy/rev/1a56a541f9f9
changeset: 2697:1a56a541f9f9
user: Kelly Vincent <kpvincent(a)bx.psu.edu>
date: Tue Sep 15 14:26:20 2009 -0400
description:
In Bowtie wrapper tool, removed --quiet option and redirected stderr to /dev/null
1 file(s) affected in this change:
tools/sr_mapping/bowtie_wrapper.py
diffs (67 lines):
diff -r bd160d40ee15 -r 1a56a541f9f9 tools/sr_mapping/bowtie_wrapper.py
--- a/tools/sr_mapping/bowtie_wrapper.py Tue Sep 15 12:57:17 2009 -0400
+++ b/tools/sr_mapping/bowtie_wrapper.py Tue Sep 15 14:26:20 2009 -0400
@@ -69,10 +69,10 @@
if options.genomeSource == 'history':
# set up commands
if options.index_settings =='index_pre_set':
- indexing_cmds = '--quiet'
+ indexing_cmds = ''
else:
try:
- indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s --quiet' % \
+ indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \
(('','--noauto')[options.iauto_b=='set'],
('','--packed')[options.ipacked=='packed'],
('','--bmax %s'%options.ibmax)[options.ibmax!='None' and options.ibmax>=1],
@@ -87,7 +87,7 @@
('','--cutoff %s'%options.icutoff)[int(options.icutoff)>0],
('','--oldpmap')[options.ioldpmap=='yes'])
except ValueError:
- indexing_cmds = '--quiet'
+ indexing_cmds = ''
# make temp directory for placement of indices and copy reference file there
tmp_dir = tempfile.gettempdir()
@@ -96,7 +96,7 @@
except Exception, erf:
stop_err('Error creating temp directory for indexing purposes\n' + str(erf))
options.ref = os.path.join(tmp_dir,os.path.split(options.ref)[1])
- cmd1 = 'cd %s; bowtie-build %s -f %s %s' % (tmp_dir, indexing_cmds, options.ref, options.ref)
+ cmd1 = 'cd %s; bowtie-build %s -f %s %s 2> /dev/null' % (tmp_dir, indexing_cmds, options.ref, options.ref)
try:
os.system(cmd1)
except Exception, erf:
@@ -105,11 +105,11 @@
# set up aligning and generate aligning command options
# automatically set threads to 8 in both cases
if options.params == 'pre_set':
- aligning_cmds = '-p %s --quiet' % options.threads
+ aligning_cmds = '-p %s' % options.threads
else:
try:
aligning_cmds = '%s %s %s %s %s %s %s %s %s %s %s %s %s %s ' \
- '%s %s %s %s %s %s %s %s %s %s %s %s -p %s --quiet' % \
+ '%s %s %s %s %s %s %s %s %s %s %s %s -p %s' % \
(('','-s %s'%options.skip)[options.skip!='None'],
('','-u %s'%options.alignLimit)[int(options.alignLimit)>0],
('','-5 %s'%options.trimH)[int(options.trimH)>=0],
@@ -138,15 +138,15 @@
('','--seed %s'%options.seed)[int(options.seed)>=0],
options.threads)
except ValueError:
- aligning_cmds = '-p %s --quiet' % options.threads
+ aligning_cmds = '-p %s' % options.threads
tmp_out = tempfile.NamedTemporaryFile()
# prepare actual aligning commands
if options.paired == 'paired':
- cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % (aligning_cmds, options.ref, options.input1, options.input2, tmp_out.name)
+ cmd2 = 'bowtie %s %s -1 %s -2 %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.input1, options.input2, tmp_out.name)
else:
- cmd2 = 'bowtie %s %s %s > %s' % (aligning_cmds, options.ref, options.input1, tmp_out.name)
+ cmd2 = 'bowtie %s %s %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.input1, tmp_out.name)
# prepare command to convert bowtie output to sam and alternative
cmd3 = 'bowtie2sam.pl %s > %s' % (tmp_out.name, options.output)
cmd4 = 'cp %s %s' % (tmp_out.name, options.output)
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/bd160d40ee15
changeset: 2696:bd160d40ee15
user: Nate Coraor <nate(a)bx.psu.edu>
date: Tue Sep 15 12:57:17 2009 -0400
description:
More fixes for cleanup_datasets.py
1 file(s) affected in this change:
scripts/cleanup_datasets/cleanup_datasets.py
diffs (19 lines):
diff -r b25489f4fb26 -r bd160d40ee15 scripts/cleanup_datasets/cleanup_datasets.py
--- a/scripts/cleanup_datasets/cleanup_datasets.py Tue Sep 15 12:04:14 2009 -0400
+++ b/scripts/cleanup_datasets/cleanup_datasets.py Tue Sep 15 12:57:17 2009 -0400
@@ -1,4 +1,6 @@
#!/usr/bin/env python
+
+import os, sys
new_path = [ os.path.join( os.getcwd(), "lib" ) ]
new_path.extend( sys.path[1:] ) # remove scripts/ from the path
@@ -8,7 +10,7 @@
import pkg_resources
pkg_resources.require( "SQLAlchemy >= 0.4" )
-import sys, os, time, ConfigParser, shutil
+import time, ConfigParser, shutil
from datetime import datetime, timedelta
from time import strftime
from optparse import OptionParser
1
0
16 Sep '09
details: http://www.bx.psu.edu/hg/galaxy/rev/14d54c9dbd59
changeset: 2698:14d54c9dbd59
user: Kelly Vincent <kpvincent(a)bx.psu.edu>
date: Wed Sep 16 11:14:39 2009 -0400
description:
Updated BWA wrapper tool: replaced threads option to hard-coded value, changed output and test file output datatype to sam, changed test file names to standard, better cleanup of temp files, added dbkey parameter and exec_before_job method to set it. Also changed name of a FASTQ Conversions tool test file
17 file(s) affected in this change:
test-data/bwa_wrapper_in1.fastq
test-data/bwa_wrapper_in2.fastq
test-data/bwa_wrapper_in3.fastq
test-data/bwa_wrapper_in4.fastq
test-data/bwa_wrapper_in5.fastq
test-data/bwa_wrapper_in6.fastq
test-data/bwa_wrapper_out0.sam
test-data/bwa_wrapper_out0b.sam
test-data/bwa_wrapper_out1.sam
test-data/bwa_wrapper_out2.sam
test-data/bwa_wrapper_out3.sam
test-data/bwa_wrapper_out4.sam
test-data/fastq_conv_in1.fastq
tools/next_gen_conversion/fastq_conversions.xml
tools/sr_mapping/bwa_wrapper.py
tools/sr_mapping/bwa_wrapper.xml
tools/sr_mapping/bwa_wrapper_code.py
diffs (1880 lines):
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_in1.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_in1.fastq Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,4 @@
+@081017-and-081020:1:1:1715:1759
+GGACTCAGATAGTAATCCACGCTCCTTTAAAATATC
++
+II#IIIIIII$5+.(9IIIIIII$%*$G$A31I&&B
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_in2.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_in2.fastq Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,576 @@
+@1831_573_1004/1
+AATACTTTCGGCGCCCTAAACCAGCTCACTGGGG
++
+><C&&9952+C>5<.?<79,=42<292:<(9/-7
+@1831_573_1050/1
+TTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT
++
+;@@17?@=>7??@A8?==@4A?A4)&+.'&+'1,
+@1831_573_1067/1
+AAGTATGAGTCATTTACCGGGATGCGAAGAAAAG
++
+++#%(',%/$,+&1#&),)&+'*'*%#$&#%('$
+@1831_573_1219/1
+CGCCCTATAATAATACAGCGCGTTAGACCGCTCA
++
+@@=4/+)5)408?'665>*/5?<61';<3,:,5-
+@1831_573_1242/1
+GCTGAATCGCACCTAGCAACTATAGCCGGGCCGC
++
+>>::>17=A5?@@=;7A=;2.60>82<8=74+;;
+@1831_573_1333/1
+AGAATCGTTACCACACACTGCGTCTGGGTATCCG
++
+='@#%3=.>)/34*117,,/6-4+.9742456<)
+@1831_573_1362/1
+CGATCTCAACCAGGTCCGCGCCCACCCCTGCCTC
++
+B@@?@@?@C@BA?@>@<@8A@?@'.8'?17:,+?
+@1831_573_1448/1
+TCACGCCGGTCCTTGACTGGCGTTCTCTTCGAGG
++
+7=A)?,.@A@@:@@<=/7)@<-#8662%9613&+
+@1831_573_1490/1
+CTCGTCATGTTACGCAAAGGCACGTCACAGCACC
++
+@@@?,@??@$><=>+%/*>*1,)?26&12'9%7.
+@1831_573_1523/1
+ATGGAACGGAACGGGTGAGGAGGGGAACGTAGGG
++
+,//*,#8''.6''6'2&27/80)%�'%#*,2'
+@1831_573_1578/1
+CGAGTAGCAAACAAGACGCCAATCCAGGCGACCC
++
+?=@<@C@?@@??=>@;7<<8<694)3511(+1<.
+@1831_573_1647/1
+AGGGGTTTACACTATTCGACTGGGTGAGAGGCGT
++
+?@@<@@???;A==@@;9?@?3=;6.4/;6:=/;4
+@1831_573_1684/1
+TTCAACTGCGTCGACGTAGCGCACAGGCGTCCGT
++
+>1:8<@39@@??9=;@@<>?@@<;<?7>76?9>?
+@1831_573_1769/1
+TGGACGTATAGTGGCGATGAGCATGTAGGTTCTC
++
+54749&&767%/7;3$-7;3#,3//#,45/#&06
+@1831_573_1853/1
+CAAAACGCCCGGGGCCTCACATGCGCGGCAGTTC
++
+4./B@@72B+/4?@?7?+@9/+99.')2<2&)2&
+@1831_573_1943/1
+ATAACGTATGGCAGTGAACGGGCGGAACCTGCCC
++
+>/*1,),@-)'0*>5'$/?6(a)/.&?8/(')A43,
+@1831_573_1977/1
+GGCGTAGGGCTCATTGTGCAAGTATCCGACCTCC
++
+@?6@8@@9A@=4633A7?9?<3:+7>'@%@?%32
+@1831_574_109/1
+TCGGTTGCGTTACTTCATGGGAGGGCTTTACATT
++
+7651#4817>@65<2:84716=788<1995.7?6
+@1831_574_148/1
+CGAACCTCGTATAACGGAGTAGTCGGAAACAGTC
++
+;->3;;3>//>0;;7=,=4-4/+>029<9<624;
+@1831_574_185/1
+CCGTTTTGCCTAGTAATGCTCGGCGCAGCGTCGC
++
+<4;%%@7>#*>97%6;+8$#<07%1372$%<54#
+@1831_574_243/1
+AGGCACCGTAACTCAGGACATTCTCGATTAGTTA
++
+@@@95@=9?*><45$<@<4%@9?7#=?>7&>?97
+@1831_574_257/1
+ATACCTTCCAAAGCAATAGAATAAAAAACAGTAC
++
+>>B>)@?>B@B?@1%*%,#+00.'(+&5&%#$3&
+@1831_574_293/1
+TGCTGCAAATAAACATACAGCCTTCTAATGACTA
++
+')7'.*%#&$'/.$##$.$&*)+*$#8%&%#&&%
+@1831_574_389/1
+CATGGCTATGCACCGGTTTGTAGCGTACTCGAGA
++
+)$>/>8):2@:213;;.1)@3%64%+)<7)+$92
+@1831_574_575/1
+TTCTTGGCAAGCGCAGATTATGCGTTCCGCCTAG
++
+?@>=?@>8@@@=@6*%&<='5@<<',0<=4*5/<
+@1831_574_592/1
+TCATTTACCACGTCAGGGTCGGAGTCATTCATTA
++
+3-5)19,,<@>4067<2.-864346;9<1/;212
+@1831_574_617/1
+AAGCATCGGCGGGAGCGCAAGCTGGGAAGGTGCC
++
+>@?;>?=@==@>@=5?@+@:@40@><:0)')64-
+@1831_574_725/1
+GACAAGATGGCTATTATTTACAATCCGATCTGCA
++
+'>?52+5=;4$6;<8.1<47*9+?7(+5;-).)4
+@1831_574_734/1
+CCTGTACGAAAGAACGTAGGCATGGGCTGGGGGG
++
+-&.$2&<*(-%):/)%5:)/%7+,9034*A.?70
+@1831_574_824/1
+AGCGCAAATTATGCGTTCCGCCTAGCGGAGAACT
++
+@@@;;),+@<-<7;<&89??&29?<$2:A<);8?
+@1831_574_959/1
+CGCGCTAGGACTCGGCCCCAAGAGACGTTCCGCC
++
+A=;+?@9;/<@;<8#&&/*%(%.0$0$2)+1$-1
+@1831_574_1062/1
+ACCGGTAATATAAGGCAACATGATTACGGCCACG
++
+@821<//:136/8<1/5.427.;>288/0<1$)/
+@1831_574_1092/1
+GACTGGCGAAATCATCGCGGAAAAACCCCTATCA
++
+8?.,98)03%#,,,(,++$'69'#9'7#';6.%2
+@1831_574_1103/1
+ATCTCCTGATTAGACATATCTCCGTAGCTCACGC
++
+??>?@=>@@A<>?=@==<:?<;58'&&):+35'1
+@1831_574_1116/1
+CACCTCACGTGAGTATAGCAGCCCGAGCGTCACC
++
+(a)++,)/)?./)%<)2>.==:8?&;44&/&,)/*4
+@1831_574_1194/1
+TTATCACATTTGGGGATCGGAAGGGACTACTTCG
++
+5>;,??9@9@?@1@@=@3=@>@4?@@94?75/,6
+@1831_574_1204/1
+CTTACTGGTCTGCTGGACATATAGTGGCGATGAA
++
+A5A5B@5:;@@,.9?A1?#.?;3),>82%)='7*
+@1831_574_1306/1
+ATTGCTTAGATCCAGTGGCGCTCAATACAACGGA
++
+2.7&%''.$&#'%##$#'#%')')$'$%$,)'.1
+@1831_574_1387/1
+GTACTTCTCAATGCTGCACTACTATGTACGCCCC
++
+?A??:@?@@9?@@?9C;:>&).,,);&'<&7(/'
+@1831_574_1431/1
+GACCAGTTTCAGGGCTAACCGTCCCTACTCGACC
++
+7?>9BA897;+8*=6#6.+5&-#26$0.,5&'$1
+@1831_574_1560/1
+GGCGTCTTAGGATTGAAGAGGGCCTCCCACCCCC
++
+))&65<4=>99@:597(1&;+&(%&4#,#&&&*$
+@1831_574_1591/1
+TGAGCACTTATGGCTAGGCGTAGGGGACCGTGAG
++
+=<A><?@78@<=;@&@6>8:?,><&&<7>8(8+9
+@1831_574_1624/1
+ACGGGAAGGGCTGGAATCTACCCAGTAGGCATTG
++
+<6.&(,7-+))7..'/;43.)927.(<85&13-)
+@1831_574_1826/1
+TACGTCGCGACCGAGCGTTATATAGTCTGACCCC
++
+@<<,))459<?.+57'-'&6?1)(;1;0&%4,#$
+@1831_574_1903/1
+AGTGCAACATCTGCTTTGCTTATCAGCACACGGC
++
+956?@>;7@@@8:@>8><=9>9>@2=71?1:6;/
+@1831_574_1961/1
+GTTTCACTTCGGTTATTAAGAAACCCAAATGGAA
++
+>-07-@>(2'@<#064@%5%@@6.&<<(9)=:47
+@1831_575_54/1
+TTTCTTATGGGTAGAACAGCTGCCACTGACTGAA
++
+=@@68<2;<8;?@<>+9>7,,6972&57:4&9<8
+@1831_575_80/1
+TCTTTGGGTTTGGGGCAATTTGGTATGTTCGTCT
++
+?5><@?/=5%(&.5)+#/0'/'>#$/'37#()6&
+@1831_575_192/1
+AACTACGCCCCTTAATTACACAGCGCGTTAGACC
++
+:8<,9(.=/)#%8$,4'<,:)3/.'.5<,>-;7'
+@1831_575_197/1
+TTCGCCTACACTTAGATACCTCTTAAACTCAATG
++
+-:?463@A:88?6?437<=.2&1.;@67.4@%2,
+@1831_575_223/1
+ACGCACAAAGGAGCTCGGCGCATAGCAACGCAGA
++
+?>?8?@:?;>+$'%&,+#$.--#%$1'&#-7$&'
+@1831_575_420/1
+CCCACATGGAAAACACTCACCGCCGAACAGAGCG
++
+2?.>?(-''2+..@),$%1@;')1)?+)7.')..
+@1831_575_434/1
+ATCGCTGCGAGGTCACCCTGGTTACGCCCCTTCC
++
+@@><@B@>=@@?>><@?;)@?;6.?=4,'';/3)
+@1831_575_444/1
+TAGGCGACCGTGAGGACAAGAGCTGTTGCCTTGA
++
+2@@;<.=?@<+=:A<<@?<</>@><(<8>4&5;>
+@1831_575_459/1
+CTTATCAGCACACGGTTTACCAGTCCGACTCCAA
++
+@@C@?A@@A@??A@@'6@@B'=;C@'4<=9,@1.
+@1831_575_506/1
+CGCAGAACCCGCACGCTTGTGCTCACCACTGTAC
++
+?)2*5@3+.7=?A:8@,/&):@37/<89'+4*)*
+@1831_575_569/1
+AGCAGACTGCTGTAACACGGTGTGGTGTAAGGAT
++
+'9-,@29+479%&>9#&08=>7#9>,69/+'*3)
+@1831_575_622/1
+CCAAATCCGGCCCAGTAAGTGTCCTGTCGCACCC
++
+A8B=@@?<@?<>>=?96<497)<4/<':4+;74+
+@1831_575_644/1
+CTTGAATGGCGATCTCGTCGAGGAAATACCACTA
++
+<8>.>&,+)8.10':5(/-62&*.=/%(.<&&18
+@1831_575_663/1
+GGCAACTTATCCGCATTGGTCCCACTGGAGCGCA
++
+@@@@A</;A>@6>0792<5966:?;25+:><1,+
+@1831_575_681/1
+TCTCCTGATTAGACATATCACTGTTGAACACAGC
++
+72?.)25;96-14'*.03'#4#++$)/8+%/((&
+@1831_575_711/1
+TATGTTCGTCCACGTCAGACGCGCAAAGTTGCGC
++
+%+:2(#'1+.)&*45-#,<A-%4;>+))<;##*&
+@1831_575_730/1
+CACACAGGAACCATAGCGTATGTTATTCACCCCC
++
+%8<%-')5,*1&=#)(1*1)->7$.,4'4'&'*&
+@1831_575_904/1
+ACCCGCTTAAAGACGTGAATTTTGCCTCCGCGCC
++
+<><.689@@7@<54:4=@>69=:+6-=42;8<:#
+@1831_575_938/1
+TCATCAGGGAAGGCTAGGGGTTTACACTATTCGA
++
+:@@:?@;=8;>=;;:=<93<4.;<6:2<9;4)87
+@1831_575_970/1
+TGACTCCTACAGTCTTTATAGTACCGAGGGAGGC
++
+@=@=;@@><@@:9<:=?;>@=?9>>?@:2===<5
+@1831_575_991/1
+TTCGGCGATCCCCCCCACGGCGCGATGCCGCGCA
++
+>>;7???/4<:::,#&*%'$--5-#(7)&&,-)%
+@1831_575_1138/1
+TTGAAAGACCTGAACGCGATTTCATGTTTACTGC
++
+=0;%1936+?96?,?5<:2>@6@<:;4>.7;*:1
+@1831_575_1157/1
+TCGCTGTTTAGATTTCGGGAGGGTACTTCAGTGC
++
+B@B>?A?A@?9'?;?&8?@@6;<@=+<:99+965
+@1831_575_1180/1
+GAATTCACGGCAGTGTTATCACCGTTTCCTTCCA
++
+<B>5?':/78?;1;55:997@@>94=7<186->7
+@1831_575_1283/1
+GGTGGAATACTAAGGACTAATGTGCTGTCTCTTT
++
+C@A54><?A78@<?2>:#>=?@2>80:'?69.'7
+@1831_575_1302/1
+CGACTATTCGTTTCGTCTAGAACGTGACACTAGC
++
+12>/#2'&-2&%'7+$%*#$/1(&&12/$,2,,*
+@1831_575_1310/1
+CTTGCTCTCGAGCTATGCCTCAGGACACGCCCTT
++
+<<>663?99@(@7=11@:@@/7??<@<;>'3-7;
+@1831_575_1321/1
+CAACACAGTTGAACGGCGGAGGAGTAAAGATGCG
++
+;B@?@>=A@<A?>@>?6>@5>'4@9?)7@7?/89
+@1831_575_1373/1
+CGCTACCGGTTCCAACGGCTGCCTGACTCGCGGA
++
+@@@>@;=9<@??7@>@:@;@;17?630/7=4,2<
+@1831_575_1419/1
+TGGGGAATATAACAGCGTAGCGTTGAACACTAGA
++
+//3->&-.->%11/@&/&7:<,-1=,+2<6'%&<
+@1831_575_1436/1
+TGCATCTAGCGCGTATTGCGAGCCTTACGGATTC
++
+2@=;39*3<*4<:>52>>9'7:<='0=>1'136<
+@1831_575_1442/1
+TCTGACAAGGTTCCTGCACCTGCGTCTGAGAGGG
++
+>58<?7>+1B6-/9<<:5?>A:1@?539.@4/1>
+@1831_575_1454/1
+CCTCCTAACCACGAGCCGAGGGGTCTCTGCCCCT
++
+4=?8=6589=0:A=:?>6<;@/,4+-6+/'.,(,
+@1831_575_1500/1
+CACAAAAGGTCCCTACCTGTCTACCCTACATAGC
++
+?@@?A@?8<6<:985=99999==B3<5<<,7?>9
+@1831_575_1535/1
+CTCGACGATATGACCGCCAGCCACTTAACTCCGC
++
+B=@<@@B?:A@@=>@?68@=?@5?7;:6<<;>5<
+@1831_575_1724/1
+TCGTAAGTGTTAAGGAGCTGTGTACAAAACAACA
++
+<B.+@),2*.%)))4%2@;7#%(%+$8))85%&5
+@1831_575_1829/1
+CATTTGCTGACCCTGCGTAGTTTAGTCTCACAGC
++
+2(*'%.3$+7)@&%$'3*+*#/#/*+0.=&#)+0
+@1831_575_1898/1
+CTTACCATATCATCTCAACCCATAACAGCACTTA
++
+B>@@&1/))'40)%#8/.%#8$((#;4'$'63,,
+@1831_575_1964/1
+GACAGACCATGAGGCTGAAGACTACTAAGTGCGT
++
+>39)@2<2/@+9?2=&)>>@*62=5&2<42.'?+
+@1831_576_32/1
+TACGCAACGATTTATGGCCTTATAATTGAGGCCA
++
+<>?>?=7<2)522;><<@40@>704<>5=23@+&
+@1831_576_74/1
+ACATTCTGCAGTGGGACAGAGCGGTACGCCGCAA
++
+=@@?@>(??<B5?@@@9<@><+><;@';>6961?
+@1831_576_86/1
+ATGAAAACGCATTAGGACAACCATAATGGCCTCA
++
+811)2:*.++5</:3+43924*))/:,6&29)2/
+@1831_576_89/1
+GCTGTTTGATTTGAGAAGAGGAATTAAGCGCCGA
++
+)69+,'.4=-,>/>(*$#)3030*'(,%)2##$,
+@1831_576_266/1
+ATGGGGTCACTCGACCTAATCCCGCGGCTTTGGT
++
+@@?=;<?7=@>9,>@1$&&89$/:>7'3178%&6
+@1831_576_327/1
+GCCGTTCTACTCTAGCTGCAACTTGCGATTGCTA
++
+@?@6@@;<@?><@>==3@:==<3@@>53<9><6=
+@1831_576_331/1
+GACGCTTTACTCCGGTAGTACCGTGCCGTTTATA
++
+-2)+(*.*1/;5%.-9&#/1'+($*$##()%/$.
+@1831_576_387/1
+ACACGCCATGATCCGATAAGAAGGGAACGTAAGG
++
+8).$5#1#*%.$##.*#$%##-%,+,1#&%.%))
+@1831_576_406/1
+AGGTCTTACAGCACGGGGCTGAGCGCATCTGACC
++
+4<>%%14:*4656)&<251&2+3#&19,6&4>5(
+@1831_576_449/1
+CTCGAACCGCGGGGTCCAAAGACTGCTGCAAGGA
++
++'&'.,,$/+.)$$8&%#+?&,#)-&###7,+#*
+@1831_576_519/1
+TACCTGCCTACTACTTGCTCTCGAGCTATGCCTC
++
+=46<97@>2/6?;2<4A881>9121+<1/4.9+7
+@1831_576_603/1
+CAATATGTCTTAGTCGTGACTCGGCAACTTATCC
++
+@46=@C.??<A79@@;-<@@>29B?>55<B7598
+@1831_576_655/1
+GAACAGTCTATAGTGGCGGGAATCTCGTCGTCAG
++
+@AA9@@<6*>@@5/<@>9'=;>7+@?9>/9;+,%
+@1831_576_677/1
+TTTACTCAGTTGATACATCACTGTAGCACATAGG
++
+290&/*0#&'&,.2'#&*$&('#-%($*#%$)#%
+@1831_576_718/1
+CGTGCCTTTCAGGGTCTTTTCTGGTCTGGTCGCT
++
+4###$0###%(#########,####%####$###
+@1831_576_722/1
+CGTATGATGGCGAGTCTTTATAATCCAATCTGAA
++
+*,-,##.)*&(*1%*(%(-2#+)-#.&-#%%$')
+@1831_576_754/1
+AGGCGTCCTGCATCGACCGTTCTATCCGCTTAGA
++
+4//#(.$)'',>($<,##%((,#5?#0*%1*
+@1831_576_815/1
+TAGGCCTGATATGACACGATCACAGTGCAACATC
++
+3>@?9>@?B=>=;>A???=>:25=4.25?6<57.
+@1831_576_882/1
+TGTAAGACGGTGAGGTGTAAGGATCAGACCACGG
++
+26?8?@:4>@>96??<<=5'1<>9846=<9<1>8
+@1831_576_898/1
+AGTACTGTCGCGCATTGGGGTCCTGGTCGTTGCT
++
+@@C2?C>?<>@886B?;?.??87=B<8<15??=.
+@1831_576_923/1
+CTGGACATGAGAGACTGCACGCGGTACACGTCGG
++
+6.51=;.699<96>;;49<;;11;<@59:9=647
+@1831_576_930/1
+CTGGCATGTACGTCCATGTCAGACGAGCAGAACT
++
+#*0.2&.:((#'14'##-)#%$$2%#$/1&#%/#
+@1831_576_1019/1
+GATGCGCGCTGTCATGGCATCGAACCATCGGTCG
++
+<$>$,1,&++&@,.)'+/+#9'69/6'2(+-'9-
+@1831_576_1068/1
+AAGAGTGACTCACTTACCGGGATGCGATGGAGCC
++
+>><=4?;@7=??9?;9>5@9?:8@:=5';7;'#4
+@1831_576_1131/1
+AGTTCGGGAAGGGCTGGAATCTACCCAGTAGGCA
++
+04@#@/@,4>9?2+?1571@',>=;(759;*92<
+@1831_576_1168/1
+CACTTAACTCCGCTGTCGGAAGCCTTACAAGACA
++
+@CC6@@2?9=>7?;76<;467@;9,0%26'',4$
+@1831_576_1207/1
+CAACCTGACTAAACGGGGATACGCTGGCGCTACA
++
+$>2,(&?4?(,@:<&,@>?$&:8A%%=0.%,597
+@1831_576_1289/1
+TAGCGCAAGTCCAGAATGTTCATAGACTCGCGAT
++
+9<>1B@@;7@@663==28,5':8<<,.=2>>.50
+@1831_576_1329/1
+CCAATAGCAGAGACCTAATAGGAAACGAAAGCAA
++
+>&0,870/A@@/5.;=;:'&@3'&$$%,+#($&2
+@1831_576_1367/1
+GGTCTCATCCGTTCCAATCGGGACTTTGACCAGT
++
+?@@<>@@=@<@<=@>@??9:?<=>=<8;59@787
+@1831_576_1416/1
+TAGCGTTCAACGTCGATCTCATCTTGCCGATGGC
++
+';?..1.<@'.=+6.5?7<0-?7;(%=>56.98@
+@1831_576_1461/1
+GAGGGGCGGCCCGGTTCAAGCAGGTAAGCAACAA
++
+1&74#(#(&##$#,'###%#%%#*#&%%##%%%'
+@1831_576_1605/1
+AGTGCAACATCTGCTTTGCTTATCAGCACACGGC
++
+<@C@@>1;@:;<<@@9@???9<3?5-21=4877,
+@1831_576_1664/1
+CGCGCACAACTCGCCATGATACGACAAGACCCGA
++
+@?>?@=A:@@>@='=<>=).>=9*8)(7#/++;)
+@1831_576_1671/1
+GTCTTTGTAAGCCTGGCCTGGTCAGGTCTGGTCT
++
+@?@5>@@8&8?25#&)&5&&)15&&,&4%&,&6%
+@1831_576_1729/1
+CGTTTCGTCTACAACGTGACACTAGCACAGTATA
++
+?@?<?<@>A?/==?>?7>?@8<?@>99;><+0=>
+@1831_576_1880/1
+TATGCGCTGTTGAGCTTACAACTCACTAGCGAAT
++
+515/792,:,7/%/05,%$):+#8%2(1754))3
+@1831_576_1982/1
+AATGTCGTCAGACGACTTTGGCGCGAAAACCATA
++
+@@?@,A><@9@=?@9B=8<6@@6@2<A?>.7<+@
+@1831_576_1987/1
+AAGGTCTTCTGTCGGCGCTGGACTGAACACCTGA
++
+##()(#&##)%)/-#%$11#%3>'##&$,#$$'+
+@1831_576_2014/1
+CCGTGACACACAATGCCGGCCCCAGCCTAGCAAT
++
+@@9,B29-5>'?,+?79+/A';'2@'5&/9,6&1
+@1831_576_2028/1
+ACTCGCCGCATCCCCGAGTGACGCTCGAGACCAA
++
+<>9>5'2(&707.8#&&39,0%7/#(#,*%&5*+
+@1831_577_40/1
+CCCCGCGTTACGAACGAGAGAAATCTCTTATAAT
++
+2/4('.')(<*?#$)%&<,/39<(.2,+<=@611
+@1831_577_119/1
+TCCCACAAGCCATTGACATGCTCGCTCTAAACAG
++
+>?C11)-1/)#;#/,850*+.+$$5550+%-.40
+@1831_577_133/1
+TGCTTGTACGGTCTAACGGGGTATGGGTTTCTGG
++
+4@>>?=1?:+>@07@@><>@*<+1@15)96'2$)
+@1831_577_255/1
+ATTGAGGCCAAGATAATTGAGGAGAGAGAAGGTG
++
+<=87@>69%**#&#-+$.#&&#+$-+%&%.,%5#
+@1831_577_281/1
+TATGTACGTCGCGTACACTCCGGGGCCCGCAAAA
++
+-:B;8@=;>7@>4?=?+659?;5<7?;9@8(>:?
+@1831_577_288/1
+CATCCGAGGCTATCAAGGCGTAAGCACTGACCTA
++
+;/79??&=B::298*6.7/+4&21,7,6?.7#'6
+@1831_577_322/1
+GAATGCTGGAGTACATTATGACAAACTATCGGAG
++
+=;>4506;255464-<#7+194&2<?65968)7/
+@1831_577_362/1
+CGATTAGTTACCACTCGTATTCGCACGCCCAGGA
++
+2B?:@?@<?958=,3:,90:&'-99,6<5.($+6
+@1831_577_382/1
+GTCGCGTATTCCCCGATGCTATGTAGACTTGCAA
++
+2957/:1))=76(*24;,3+:<.&.&-=1=2/5*
+@1831_577_464/1
+TAGAGGCACCCTAACTCAGGGCTTTCTCGATTAG
++
+=<7/<826)>#.'&4204+5#/041.7*91&756
+@1831_577_488/1
+TGAATAGTTATGGCCAGAATGTCTGCACCGATAC
++
+48.,*>6566<?8=<=<2>6;94>;=9>@8924@
+@1831_577_511/1
+GGTGCTTATCAGTTCTTTCTCGGGACTTGTATTT
++
+7/+&$:<7%6,$$%'%/+)#$7((&*3>16'0/+
+@1831_577_545/1
+ACCGCTCTTTGGGTATGGGGCAATCTGGCATGTT
++
+><<9?<>?A<481@<@8==@76/61<95.5988-
+@1831_577_559/1
+GTGCCACTATGTTCGACAGACCCTAAGGCGGAAG
++
+<@;??@>@?/2<.@1=>12=61/;=?.&2+92)'
+@1831_577_562/1
+GTTCCACTACGTTCCACGCAAAGGAATCCGAATC
++
+((&*&0%)1%)#($2-,***%/-,,))&,-.1'*
+@1831_577_637/1
+GCCTTCGCGGGAGCATATCAGTCGATACATCCCA
++
+-,4#>:-6+:8,&(5;3=0>7=68&1/9&'?;4,
+@1831_577_641/1
+TATCTACCACCGCGGTGGCGCGAGAATGCTCCCT
++
+47;/':A;;5?:72,(=),#*?+.#&7$8#%7/'
+@1831_577_692/1
+CCGGTGAGAATTACATCGCGAGTACGCCCAAGGA
++
+B,@?@B(@A?@+@@>@+?=>@'=<@<=<9=?75<
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_in3.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_in3.fastq Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,192 @@
+@1378_11_329/2
+GTTCGTGGCCGGTGGGTGTTTGGG
++
+###$$#$#$&#####$'$#$###$
+@1378_17_1788/2
+TGGGTGGATGTGTTGTCGTTCATG
++
+#$#$###$#$#######$#$####
+@1378_25_2035/2
+GTGCGTCGGGGAGGGTGCTGTCGG
++
+######%#$%#$$###($###&&%
+@1378_28_770/2
+GGTTGGGAGTTGGCGCGCGTGGTT
++
+###$##%%$####%#%$#%%#($$
+@1378_33_1945/2
+GGGGGGGGGGGGGGGGGGGGGGGG
++
+.6;?;55697>8779457986264
+@1378_34_789/2
+GGGCTTGCGTTAGTGAGAGGTTGT
++
+###%$%$%%###$####$###$#&
+@1378_35_263/2
+GGCGGGGGGGGGGGGGGGGGGGGG
++
+$%%&,)3/)%$-6'$&5&#$''5.
+@1378_43_186/2
+GCTAGGGTTTGGGTTTGCGGTGGG
++
+$%#$########%##%#$###'#'
+@1378_51_1671/2
+GGTGTTGTTCGGATGTAGCGTGGG
++
+##%$$####$########$##0##
+@1378_56_324/2
+GCTGGGTGATCTAAGGAATGTAAT
++
+#$#####$$$#$$$$##%###*(#
+@1378_56_773/2
+TCTGGTCGGTTTCGGGGAGTGGAA
++
+##%%#&$###$#$##%$####%%$
+@1378_62_2027/2
+GTTGGCCTGGCCTGCCGTGCTGCG
++
+*##),/%##$)#%##1$#'%.#&#
+@1378_62_2029/2
+GGCGGTGTGTGGTGCGGCTGTGCG
++
+/$$$=(####%####)$$%$-&%#
+@1378_67_1795/2
+CGTGCCAACGACCGTCCTACGGAG
++
+@@.?7@@@.@:@A<<>>=>70<@7
+@1378_68_466/2
+GTGTCATCTGAGGTAAAGCATTGT
++
+/##$09#$#.=$#$76+$%1'###
+@1378_68_1692/2
+TGTTCGATTTAGGTCTTGCCTCGC
++
+###%%$%%##%$#$##$###$$#$
+@1378_80_664/2
+TGTCTGCGTTGTATCTCTGGTGTA
++
+%##%,%$$#&$$###$#$%##'%#
+@1378_85_1786/2
+CCCTAGGAGCGTATACCGGACGAG
++
+,'&/%/@,&1,&'/)&,6&&1)((
+@1378_86_1011/2
+AGGCGATGGGATATTATTTTACTT
++
+:$###)%##$9$###1$$#$2###
+@1378_86_1789/2
+GGCCAGGTACTAATAAATTCAACA
++
+1.51/&%#'$,*?%#6&0$$5,20
+@1378_91_1596/2
+GCTTTTTCATTCGGTGCCTTTGGA
++
+'>%/3%=()8'#.%?50$&5>%)%
+@1378_94_1595/2
+GCGGGTTAACCCAGTACCTCTGGC
++
+,8@6'>8<76>@@(->9)7'*941
+@1378_95_1039/2
+GTTCTGTGCCAGGTGAGGTACGGA
++
+&##,./#$&)6##+,'#$$0(##$
+@1378_95_1767/2
+CTAATTGACCGGGCAAGCTATTAA
++
+',,2'(10/>:=/7).3&%6&)&6
+@1378_96_1037/2
+CTGCTGGGCCATTTGACTTACTCA
++
+'$#+#(##-%5##+*&###-.$$$
+@1378_96_1764/2
+TCTTTGTAACCCACTTAGTATTTC
++
+4='28>.@4881B807:822>%)1
+@1378_98_1574/2
+AGGCGAGTGTGGGGGTTGTTTGAG
++
++%%$#)##%##$####%###$%$#
+@1378_107_1647/2
+GGTCTGGTTCTATGTTGGTCGACT
++
+###'$$#$$$(#%###(#$##$%#
+@1378_111_829/2
+GGTCTTCTATGGTGCTAAATTAGT
++
+$##'&###$##)#%($#$%#$'%#
+@1378_111_1900/2
+GCACGCCTTTGGGCTAAGCCGTAA
++
+)$)'#%$########$'#&%$#(#
+@1378_112_1483/2
+TGGAGTGGTGTGTTTGCTGAGCCA
++
+#$#)#############$#%#%'%
+@1378_125_1287/2
+TGACGTGGGTTGTCCCGTGAGATT
++
+##$%%#$###$##$$#&%##$(%%
+@1378_126_468/2
+AAAAAACGGGCCGGGCGGACTAGG
++
+@@B@@@>@2>1+?:92)>@8&&<+
+@1378_127_664/2
+TCGCTTTGCCTATGTTTGTTCGGA
++
+#%$%#&##$%#%%###$$###)-'
+@1378_129_463/2
+AAAAAAAAAAAAAAAAAAAAAAAA
++
+,)&20%8'(&35-***1-2&+510
+@1378_129_875/2
+GACCTTTACGTATTGGGGGTTGGC
++
+###)###+###$##$#&%##$,#$
+@1378_140_1251/2
+TTTCCTTCGTGTGCGTGCGGAGTG
++
+#%#%$##$$$######.$$$%#%(
+@1378_141_809/2
+TCTCGTGGTTTCTTTTTTATGTGT
++
+##%)##$$#####%$#$#%%#'##
+@1378_144_983/2
+GTTCGTTCGTGGTGTACGAGGGTG
++
+#(#%#####($#%##$$#%##%#)
+@1378_153_270/2
+GGCCGTGTGCGGGTGTAGATTGGA
++
+%$##($######&##$&$$$$%##
+@1378_155_1689/2
+CGCGTTCGGACAAAGCTAGCACCT
++
+@4@@=6=@@<4:@@681@@@.24.
+@1378_157_1580/2
+GGGATTGAAGGGATGTATGCTAGG
++
+#%$&%#$$'%$%#$##*#%$$$$#
+@1378_161_317/2
+AATCCATACCCACAAAAGCAGGCC
++
+.&%','(@''?7//+&)+2.+)0)
+@1378_177_735/2
+CGAGCCCTAAACCATGAGATCGGA
++
+@@B>@AC9@A<B8@=9>7@5)>,0
+@1378_181_1684/2
+TTTCTGTTGTGGTTTTGTTGGGGT
++
+$##'$%'##%##$%$#$$####$*
+@1378_187_1407/2
+TTGGGTGAAATCTTGTCGAGTGGA
++
+####&##$$###$#####%##%%)
+@1378_203_721/2
+CCTTTACGATCATAAACCATGACC
++
+9<&./1&@;12')?<1',?/)&/.
+@1378_206_2039/2
+CCCCCCCCCCCCCCCCCCCCCCCC
++
+?=>>?=:;<>>4::<=56199(05
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_in4.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_in4.fastq Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,192 @@
+@1378_11_329/1
+AGACCGGGCGGGGTGGCGTTCGGT
++
+%##+'#######%###$#$##$(#
+@1378_17_1788/1
+TGCCGTGTCTTGCTAACGCCGATT
++
+#'#$$#$###%%##$$$$######
+@1378_25_2035/1
+CTGCGTGTTGGTGTCTACTGGGGT
++
+#%#'##$#$##&%#%$$$%#%#'#
+@1378_28_770/1
+CTGCTATGCCGGCCGCCTGCCCCT
++
+%##(#$$#%$#$%###$%%$##$#
+@1378_33_1945/1
+GGGGGGGGGGGGGGGGGGGGGGGG
++
++9;64287788752796/576352
+@1378_34_789/1
+ATGGTGGCTGACGCGTTTGACTGT
++
+#$##%#$##$&$#%##$##$###$
+@1378_35_263/1
+GGGCGGGGGGCGGCGGGGGGGGGG
++
+),,#%'$)'-(%&%'%',%%&&(&
+@1378_43_186/1
+ATACTAGTTGGGACGCGTTGTGCT
++
+#$(4%$########$#$###$$$#
+@1378_51_1671/1
+TGGGTAGGTGCGGCTCTTGCGGTC
++
+##$#$%###%%#%#$##$$#####
+@1378_56_324/1
+AGGCTTGGATTGTTGCGCTGACCT
++
+/+#**###%%##%#####$$$#$#
+@1378_56_773/1
+TGTCGTGAGGTCACTTATCCCCAT
++
+&%#%##%%#####&#$%##$%##$
+@1378_62_2027/1
+CTTCCACGATCTGCTCGCTGTGGT
++
+(#&&$##$$#$%#%$$$#$###'#
+@1378_62_2029/1
+TCTGGGCTGTCTTCGGGTCGGTGT
++
+$%$$####$##$$#)##%%#$###
+@1378_67_1795/1
+TAGTGGGCTAGTCCGGGATCTACC
++
+B@C2@@?;-@@1>'A@@91@.@1'
+@1378_68_466/1
+GTGATCGTCGGTGCCAGTCCCTGT
++
+#(%)+##$#$#%#+$%##$#####
+@1378_68_1692/1
+GGTCTTCTGGGCACGGGCCAAGTT
++
+###)%'&%##$###%#######$#
+@1378_80_664/1
+CTGCTTTGATCCCCGGTGGAGCAC
++
+7#%###$$6#######$##$$$##
+@1378_85_1786/1
+ATACTATGTCGATCTGTAAAAAAA
++
+)&.)#3%(a)$&%-,2#&+.-%0&./
+@1378_86_1011/1
+CTACGTTATTGCTCTGTTTGTCCT
++
+######$%##$$$%###%#$####
+@1378_86_1789/1
+CGAATATCCCCTGGTTTAGAAGTA
++
+61?1;@/>@<=/(.7'(<5@=%$=
+@1378_91_1596/1
+TTAGCGGTTGACTATCTGCTGACA
++
+*&+'#9'(%*'#//,&<),/)'*#
+@1378_94_1595/1
+CGTGCGACAGCCCATGTTTTCAGA
++
+-=..5,3826&*+.+#+#%%6;%#
+@1378_95_1039/1
+CGGCGTCCATCTTCGCCTTGAGAT
++
+$##.#$##$$#%$#$%%$###$)$
+@1378_95_1767/1
+ACACGCACCATCTGGCGGCTAACC
++
+86<65<4<C27/$5+#;-$+#2%$
+@1378_96_1037/1
+ATCCCCCAAGATGCCTGTTGATTG
++
+$#$'##$$$#%$$#%###+##$#$
+@1378_96_1764/1
+TACACCGGAACCTTGCACTAAAGC
++
+6-9'<#+81?+*<8-=69&6;*3)
+@1378_98_1574/1
+GTTCTGCCGGTGTCTGTGGCGGGC
++
+$$#+&$$####%$$$###$%#%%#
+@1378_107_1647/1
+AGGCCTACTACGCGTCATTGATAG
++
+&#$$#$(.#%#$$####&$%##($
+@1378_111_829/1
+TGCGGCACTTGCTTCTTCGTATTT
++
+%#%##%#$%#$#%###$$##&#$$
+@1378_111_1900/1
+TCCCCTCGCTCGGCTCTGTGCTGT
++
+$&%*$#(#)##$#'##%(##$#$%
+@1378_112_1483/1
+TGTCCAGCTATGCGGCTTCCTCCT
++
+%#$+#%#&#$#####%####%$##
+@1378_125_1287/1
+TGTCTCTGGGGGGCCTGGTTAGGT
++
+$##13$'%#$###$$###$$$#&#
+@1378_126_468/1
+TCTCCTGATTAGACATATCTCCGT
++
+7>@318?=,=8@49579?..7,7)
+@1378_127_664/1
+AGAGGTTGGTGTCTTGTCGCAGCT
++
+##'#$######$$%######$$$#
+@1378_129_463/1
+CCTGTGGGCCAAGCCCAATGAAAG
++
+8),,<6(--)<()5&.&/8+;.2+
+@1378_129_875/1
+TTTCTATGGCTTACGCTGTCTGCC
++
+#$($##%####%$#$#####$###
+@1378_140_1251/1
+ATCCTAGCGCGGTGTCTTGGGGAC
++
+#$%1#$$$##$##$#$#$##$%$$
+@1378_141_809/1
+TGTCCTCCAGTGTCTGTTGGGTGT
++
+%&,-##$$#(%###$#$$'###'#
+@1378_144_983/1
+AGCGCCCGGTTGGTGCGGCTCGTC
++
+-$(&%*$#*#))#$$$#%%$#$##
+@1378_153_270/1
+AGTCCTTGTCCCCTGGGTTTTCCC
++
++''$#&%$%#$##&$$($#&#$$#
+@1378_155_1689/1
+TTGGGAGGGAGAGAGACTAGACCG
++
+?4@@4<<??<?-@4@=4<5<?:/>
+@1378_157_1580/1
+TGGGCCTCGGTGCCCTTGGTCTGT
++
+#%)$##'#$$$&#####%#$#$##
+@1378_161_317/1
+TTGGCCGGCAACCCCGGTACCTAA
++
+7<,<'@)@>.)2@/')'&(?/-<(
+@1378_177_735/1
+AGTACGCCATGTATTTGCGACCAG
++
+=?@?C<88@=>:7>@55/.,416,
+@1378_181_1684/1
+CGACTCCCGCATTCACGGTCAAGT
++
+&*#,##$#&$*$$#$#$$$#%$##
+@1378_187_1407/1
+TGGCGTCCACTCGTGGGTCTATCG
++
+$#$'%#$%$%&$%#####$#$#%#
+@1378_203_721/1
+CCTAAATAACCCAGGGTAAAAGAT
++
+9<1+2+1@=259@+;2.71&@+2&
+@1378_206_2039/1
+CCCCCCCCCCCCCCCCCCCCCCCC
++
+9@?8>;>>><<994;=895895-8
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_in5.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_in5.fastq Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,4 @@
+@081017-and-081020:1:1:1715:1759/2
+ACGCTCCTTTAAAATATC
++/2
+IIIII$%*$G$A31I&&B
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_in6.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_in6.fastq Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,4 @@
+@081017-and-081020:1:1:1715:1759/1
+GGACTCAGATAGTAATCC
++/1
+II#IIIIIII$5+.(9II
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_out0.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_out0.sam Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,2 @@
+@SQ SN:PHIX174 LN:5386
+081017-and-081020:1:1:1715:1759 16 PHIX174 322 25 36M * 0 0 GATATTTTAAAGGAGCGTGGATTACTATCTGAGTCC B&&I13A$G$*%$IIIIIII9(.+5$IIIIIII#II XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:2C8A24
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_out0b.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_out0b.sam Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,145 @@
+@SQ SN:phiX LN:5386
+1831_573_1004 0 phiX 278 25 33M1S * 0 0 TTAGATATGAGTCACATTTTGTTCATGGTAGAGG ]]>!4[WPFW]\ZSV]\YNRZOWWTTU]MJQEM XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1050 4 * 0 0 * * 0 0 TTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT ;@@17?@=>7??@A8?==@4A?A4)&+.'&+'1,
+1831_573_1067 4 * 0 0 * * 0 0 AAGTATGAGTCATTTACCGGGATGCGAAGAAAAG ++#%(',%/$,+&1#&),)&+'*'*%#$&#%('$
+1831_573_1219 16 phiX 2202 37 1S33M * 0 0 TACGTCACCTTATCAGTCCAATTTAAATTACACT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1242 0 phiX 648 37 33M1S * 0 0 CATCCCGTCAACATTCAAACGGCCTGTCTCATCC ]]]]XQ]]]]]]]][]]]VIMOW]SW]]]T*1] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1333 4 * 0 0 * * 0 0 AGAATCGTTACCACACACTGCGTCTGGGTATCCG ='@#%3=.>)/34*117,,/6-4+.9742456<)
+1831_573_1362 4 * 0 0 * * 0 0 CGATCTCAACCAGGTCCGCGCCCACCCCTGCCTC B@@?@@?@C@BA?@>@<@8A@?@'.8'?17:,+?
+1831_573_1448 16 phiX 2563 25 1S33M * 0 0 CGAAGTAACGTAAGTCTACCTATGTAGACAGTTG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1490 0 phiX 1767 25 33M1S * 0 0 CGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACC ]]]TU]]]=;]]]R9=BQQDF>QZQ1,LBIGEN XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1523 4 * 0 0 * * 0 0 ATGGAACGGAACGGGTGAGGAGGGGAACGTAGGG ,//*,#8''.6''6'2&27/80)%�'%#*,2'
+1831_573_1578 4 * 0 0 * * 0 0 CGAGTAGCAAACAAGACGCCAATCCAGGCGACCC ?=@<@C@?@@??=>@;7<<8<694)3511(+1<.
+1831_573_1647 16 phiX 1118 37 1S33M * 0 0 ACTGAGGAAGCTCTACCTGCGGCAACCGCGAGAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1684 16 phiX 3696 37 1S33M * 0 0 ATCACGACTCCAACTGAATCAAGTAGTCGTTTGC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1769 4 * 0 0 * * 0 0 TGGACGTATAGTGGCGATGAGCATGTAGGTTCTC 54749&&767%/7;3$-7;3#,3//#,45/#&06
+1831_573_1853 4 * 0 0 * * 0 0 CAAAACGCCCGGGGCCTCACATGCGCGGCAGTTC 4./B@@72B+/4?@?7?+@9/+99.')2<2&)2&
+1831_573_1943 4 * 0 0 * * 0 0 ATAACGTATGGCAGTGAACGGGCGGAACCTGCCC >/*1,),@-)'0*>5'$/?6(a)/.&?8/(')A43,
+1831_573_1977 4 * 0 0 * * 0 0 GGCGTAGGGCTCATTGTGCAAGTATCCGACCTCC @?6@8@@9A@=4633A7?9?<3:+7>'@%@?%32
+1831_574_109 0 phiX 3729 37 33M1S * 0 0 CAGAATCAGCGGTATGGCTCTTCTCATATTGGCT VTO/2URQ]]]TZWU[UTQP\]XY]VS[WLN]] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_148 16 phiX 2369 37 1S33M * 0 0 GATCCAAAAGACGAATCCTCAAATTAGTACAAAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_185 4 * 0 0 * * 0 0 CCGTTTTGCCTAGTAATGCTCGGCGCAGCGTCGC <4;%%@7>#*>97%6;+8$#<07%1372$%<54#
+1831_574_243 0 phiX 2129 37 33M1S * 0 0 TCTGGTGATTTGCAAGAACGCGTACTTATTCGCA ]]]W]]]]RQ]YRBI]]Y0<]]]CI]]]FM]]Y XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_257 4 * 0 0 * * 0 0 ATACCTTCCAAAGCAATAGAATAAAAAACAGTAC >>B>)@?>B@B?@1%*%,#+00.'(+&5&%#$3&
+1831_574_293 4 * 0 0 * * 0 0 TGCTGCAAATAAACATACAGCCTTCTAATGACTA ')7'.*%#&$'/.$##$.$&*)+*$#8%&%#&&%
+1831_574_389 4 * 0 0 * * 0 0 CATGGCTATGCACCGGTTTGTAGCGTACTCGAGA )$>/>8):2@:213;;.1)@3%64%+)<7)+$92
+1831_574_575 16 phiX 4006 37 1S33M * 0 0 CTTACAGTGCGACTAATAAAACTGAAACTCGCAT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_592 16 phiX 2950 37 1S33M * 0 0 TCGCCATAACGAAGACGAGAACGACCACCGCGGT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_617 4 * 0 0 * * 0 0 AAGCATCGGCGGGAGCGCAAGCTGGGAAGGTGCC >@?;>?=@==@>@=5?@+@:@40@><:0)')64-
+1831_574_725 4 * 0 0 * * 0 0 GACAAGATGGCTATTATTTACAATCCGATCTGCA '>?52+5=;4$6;<8.1<47*9+?7(+5;-).)4
+1831_574_734 16 phiX 5263 25 1S33M * 0 0 CCTCTCTACTCTAACTCCGACCCTTTTCAATGAC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_574_824 16 phiX 3997 37 1S33M * 0 0 ATGGGAAGACTTACAGTGCGACTAATAAAACTGA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_959 4 * 0 0 * * 0 0 CGCGCTAGGACTCGGCCCCAAGAGACGTTCCGCC A=;+?@9;/<@;<8#&&/*%(%.0$0$2)+1$-1
+1831_574_1062 4 * 0 0 * * 0 0 ACCGGTAATATAAGGCAACATGATTACGGCCACG @821<//:136/8<1/5.427.;>288/0<1$)/
+1831_574_1092 4 * 0 0 * * 0 0 GACTGGCGAAATCATCGCGGAAAAACCCCTATCA 8?.,98)03%#,,,(,++$'69'#9'7#';6.%2
+1831_574_1103 4 * 0 0 * * 0 0 ATCTCCTGATTAGACATATCTCCGTAGCTCACGC ??>?@=>@@A<>?=@==<:?<;58'&&):+35'1
+1831_574_1116 4 * 0 0 * * 0 0 CACCTCACGTGAGTATAGCAGCCCGAGCGTCACC @++,)/)?./)%<)2>.==:8?&;44&/&,)/*4
+1831_574_1194 0 phiX 1136 37 33M1S * 0 0 GCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTG \]PT]]]]]]]ZZ]]]\Y]]]]\]]]V\]UMDK XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1204 16 phiX 1445 25 1S33M * 0 0 TGGATTACTCGAATTAGTTCTACTACGAGCAATA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1306 4 * 0 0 * * 0 0 ATTGCTTAGATCCAGTGGCGCTCAATACAACGGA 2.7&%''.$&#'%##$#'#%')')$'$%$,)'.1
+1831_574_1387 4 * 0 0 * * 0 0 GTACTTCTCAATGCTGCACTACTATGTACGCCCC ?A??:@?@@9?@@?9C;:>&).,,);&'<&7(/'
+1831_574_1431 4 * 0 0 * * 0 0 GACCAGTTTCAGGGCTAACCGTCCCTACTCGACC 7?>9BA897;+8*=6#6.+5&-#26$0.,5&'$1
+1831_574_1560 4 * 0 0 * * 0 0 GGCGTCTTAGGATTGAAGAGGGCCTCCCACCCCC ))&65<4=>99@:597(1&;+&(%&4#,#&&&*$
+1831_574_1591 16 phiX 5118 37 1S33M * 0 0 CTTCGACAAGTCTTAGTCTTACTCGGCGTTGAAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1624 16 phiX 4058 37 1S33M * 0 0 CATAACTCCGAACACCGTAAAGATGAGAAAGAGT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1826 4 * 0 0 * * 0 0 TACGTCGCGACCGAGCGTTATATAGTCTGACCCC @<<,))459<?.+57'-'&6?1)(;1;0&%4,#$
+1831_574_1903 0 phiX 5209 37 33M1S * 0 0 AGCTGGGTTACGACGCGACGCCGTTCAACCAGAC WT]]]][]]]][]]]]]]]]]]][X]QYYTYZ- XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1961 0 phiX 3555 25 33M1S * 0 0 CGCGTTGCGTCTATTATGGAAAACACCAATCTTA TFPMV]OCBP]H<OS]<1CN]]M=K]52KO]WT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_575_54 16 phiX 3587 37 1S33M * 0 0 TAAGGTTCGTTGTCGTCCAAAGGCTCTAATACGC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_80 4 * 0 0 * * 0 0 TCTTTGGGTTTGGGGCAATTTGGTATGTTCGTCT ?5><@?/=5%(&.5)+#/0'/'>#$/'37#()6&
+1831_575_192 4 * 0 0 * * 0 0 AACTACGCCCCTTAATTACACAGCGCGTTAGACC :8<,9(.=/)#%8$,4'<,:)3/.'.5<,>-;7'
+1831_575_197 16 phiX 4861 37 1S33M * 0 0 CCGAACGTTTTATGCACCGGAATACCAATGTCAT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_223 4 * 0 0 * * 0 0 ACGCACAAAGGAGCTCGGCGCATAGCAACGCAGA ?>?8?@:?;>+$'%&,+#$.--#%$1'&#-7$&'
+1831_575_420 4 * 0 0 * * 0 0 CCCACATGGAAAACACTCACCGCCGAACAGAGCG 2?.>?(-''2+..@),$%1@;')1)?+)7.')..
+1831_575_434 4 * 0 0 * * 0 0 ATCGCTGCGAGGTCACCCTGGTTACGCCCCTTCC @@><@B@>=@@?>><@?;)@?;6.?=4,'';/3)
+1831_575_444 16 phiX 5098 37 1S33M * 0 0 TTCGACAGCGATGAAGGGTTCTTCGACAAGTCTT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_459 4 * 0 0 * * 0 0 CTTATCAGCACACGGTTTACCAGTCCGACTCCAA @@C@?A@@A@??A@@'6@@B'=;C@'4<=9,@1.
+1831_575_506 4 * 0 0 * * 0 0 CGCAGAACCCGCACGCTTGTGCTCACCACTGTAC ?)2*5@3+.7=?A:8@,/&):@37/<89'+4*)*
+1831_575_569 0 phiX 5043 25 33M1S * 0 0 GACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAAT IOBU[TMHTYG4M]7$?Q]]]57]SKXQC;:FE XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_575_622 0 phiX 2485 37 33M1S * 0 0 GTTTTACAGACACCTAAAGCTACATCGTCAACGC ]]]]]]]]]]]]]]]X[YVYINYLTLJWHO[$! XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_644 4 * 0 0 * * 0 0 CTTGAATGGCGATCTCGTCGAGGAAATACCACTA <8>.>&,+)8.10':5(/-62&*.=/%(.<&&18
+1831_575_663 16 phiX 1554 25 1S33M * 0 0 TGCTGAAGATGGTGTAGATAACTGTAATACCCAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_575_681 4 * 0 0 * * 0 0 TCTCCTGATTAGACATATCACTGTTGAACACAGC 72?.)25;96-14'*.03'#4#++$)/8+%/((&
+1831_575_711 4 * 0 0 * * 0 0 TATGTTCGTCCACGTCAGACGCGCAAAGTTGCGC %+:2(#'1+.)&*45-#,<A-%4;>+))<;##*&
+1831_575_730 4 * 0 0 * * 0 0 CACACAGGAACCATAGCGTATGTTATTCACCCCC %8<%-')5,*1&=#)(1*1)->7$.,4'4'&'*&
+1831_575_904 4 * 0 0 * * 0 0 ACCCGCTTAAAGACGTGAATTTTGCCTCCGCGCC <><.689@@7@<54:4=@>69=:+6-=42;8<:#
+1831_575_938 16 phiX 1133 37 1S33M * 0 0 TCTGCGGCAACCGCGAGAGGCAGAAAGAGGTAAC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_970 16 phiX 232 37 1S33M * 0 0 GCTCCTCTTCACCGAATTATACGAACCGTGCAAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_991 4 * 0 0 * * 0 0 TTCGGCGATCCCCCCCACGGCGCGATGCCGCGCA >>;7???/4<:::,#&*%'$--5-#(7)&&,-)%
+1831_575_1138 0 phiX 1913 37 33M1S * 0 0 GCTAAAGGTGCTTTGACTTATACCGATATTGCTC VT7-SURJS]X]TT]Z]UY]]]]]]X[UN[NMT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1157 16 phiX 3736 25 1S33M * 0 0 GTCGCCATACCGAGAAGAGTATAACCGCGATGAC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1180 0 phiX 4169 37 33M1S * 0 0 GGGCGTTGAGTTCGATAATGGTGATATGTATGTA ]]\]OJROX]]UUYSX\[Y]]]]VZ]\VRWLT] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1283 4 * 0 0 * * 0 0 GGTGGAATACTAAGGACTAATGTGCTGTCTCTTT C@A54><?A78@<?2>:#>=?@2>80:'?69.'7
+1831_575_1302 16 phiX 5202 37 1S33M * 0 0 GGAATGGTTCGACCCAATGCTGCGCTGCGGCAAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1310 16 phiX 1612 37 1S33M * 0 0 AATGACTGGTTCTTGCACTAATGAAGTACGTCGC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1321 0 phiX 2664 37 33M1S * 0 0 TTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACG ]]]]]]]]]]]]]]]]]]]\ND]]]7/]]]WPZ XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1373 16 phiX 4221 37 1S33M * 0 0 TAGACTGCAAGCACTACTCAAACATAGACAATGA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1419 4 * 0 0 * * 0 0 TGGGGAATATAACAGCGTAGCGTTGAACACTAGA //3->&-.->%11/@&/&7:<,-1=,+2<6'%&<
+1831_575_1436 0 phiX 3460 37 33M1S * 0 0 CTGGCATTCAGTCGGCGACTTCACGCCAGAATAC []]WULFXOGY]]\PY]]IGZ]]M@V]XAAMR[ XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1442 0 phiX 3317 37 33M1S * 0 0 TGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCG \V]]]]RE\]LEQ]]]X]]]]TZ]]QUPW]LIX XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1454 4 * 0 0 * * 0 0 CCTCCTAACCACGAGCCGAGGGGTCTCTGCCCCT 4=?8=6589=0:A=:?>6<;@/,4+-6+/'.,(,
+1831_575_1500 16 phiX 4269 37 1S33M * 0 0 GCTTAACCGTGTTACGATGTTACACGAGGGGGTT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1535 16 phiX 1225 37 1S33M * 0 0 GAGTGCAAATACCACTTGTCACCTAATTCAAGTA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1724 4 * 0 0 * * 0 0 TCGTAAGTGTTAAGGAGCTGTGTACAAAACAACA <B.+@),2*.%)))4%2@;7#%(%+$8))85%&5
+1831_575_1829 4 * 0 0 * * 0 0 CATTTGCTGACCCTGCGTAGTTTAGTCTCACAGC 2(*'%.3$+7)@&%$'3*+*#/#/*+0.=&#)+0
+1831_575_1898 4 * 0 0 * * 0 0 CTTACCATATCATCTCAACCCATAACAGCACTTA B>@@&1/))'40)%#8/.%#8$((#;4'$'63,,
+1831_575_1964 4 * 0 0 * * 0 0 GACAGACCATGAGGCTGAAGACTACTAAGTGCGT >39)@2<2/@+9?2=&)>>@*62=5&2<42.'?+
+1831_576_32 16 phiX 988 25 1S33M * 0 0 TAACTCCTATTTAATACAGATTATAAGTTTGACC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_576_74 4 * 0 0 * * 0 0 ACATTCTGCAGTGGGACAGAGCGGTACGCCGCAA =@@?@>(??<B5?@@@9<@><+><;@';>6961?
+1831_576_86 16 phiX 2981 37 1S33M * 0 0 TGTACAGATTTAACAAACCTCCGCCAGTTTTTCG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_89 4 * 0 0 * * 0 0 GCTGTTTGATTTGAGAAGAGGAATTAAGCGCCGA )69+,'.4=-,>/>(*$#)3030*'(,%)2##$,
+1831_576_266 16 phiX 280 37 1S33M * 0 0 ATCTATACTCAGTGTAAAACAAGTACCATCTCTA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_576_327 16 phiX 4517 25 1S33M * 0 0 TATGATAAGTCGCAAACTACTTACGTTACGCTGT XT:A:U CM:i:1 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:1G31
+1831_576_331 4 * 0 0 * * 0 0 GACGCTTTACTCCGGTAGTACCGTGCCGTTTATA -2)+(*.*1/;5%.-9&#/1'+($*$##()%/$.
+1831_576_387 4 * 0 0 * * 0 0 ACACGCCATGATCCGATAAGAAGGGAACGTAAGG 8).$5#1#*%.$##.*#$%##-%,+,1#&%.%))
+1831_576_406 4 * 0 0 * * 0 0 AGGTCTTACAGCACGGGGCTGAGCGCATCTGACC 4<>%%14:*4656)&<251&2+3#&19,6&4>5(
+1831_576_449 4 * 0 0 * * 0 0 CTCGAACCGCGGGGTCCAAAGACTGCTGCAAGGA +'&'.,,$/+.)$$8&%#+?&,#)-&###7,+#*
+1831_576_519 16 phiX 1625 25 1S33M * 0 0 GGCACTAATGAAGTACGTCGCAATGGCACTACAA XT:A:U CM:i:1 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:25A7
+1831_576_603 16 phiX 1576 37 1S33M * 0 0 GGTAATACCCAGACGTTCGACGAATACGATTAAA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_655 16 phiX 3292 37 1S33M * 0 0 CAACGACGACGTAAAGGACTCGAATTACGAACCC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_576_677 4 * 0 0 * * 0 0 TTTACTCAGTTGATACATCACTGTAGCACATAGG 290&/*0#&'&,.2'#&*$&('#-%($*#%$)#%
+1831_576_718 4 * 0 0 * * 0 0 CGTGCCTTTCAGGGTCTTTTCTGGTCTGGTCGCT 4###$0###%(#########,####%####$###
+1831_576_722 4 * 0 0 * * 0 0 CGTATGATGGCGAGTCTTTATAATCCAATCTGAA *,-,##.)*&(*1%*(%(-2#+)-#.&-#%%$')
+1831_576_754 4 * 0 0 * * 0 0 AGGCGTCCTGCATCGACCGTTCTATCCGCTTAGA 4//#(.$)'',>($<,##%((,#5?#0*%1*
+1831_576_815 0 phiX 5186 37 33M1S * 0 0 GGAGTGCTTAATCCAACTTACCAAGCTGGGTTAC Z]]]]]]]]]]]]]]]]]]]UP[ZKIP]][ZUN XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_882 0 phiX 5053 37 33M1S * 0 0 CTAAAGGTCTAGGAGCTAAAGAATGGAACAACTG Q]]]]]W[]]]X]]]]][EAV]]ZUS\]]]VX] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_898 0 phiX 518 37 33M1S * 0 0 GATTGCTACTGACCGCTCTCGTGCTCGTCGCTGT ]]]Z]]]]]]]YW]]]]VV]]X]]]]]VO]]]T XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_923 0 phiX 1050 37 33M1S * 0 0 ATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCG MLOW]RMX[]]X]]]XV]]]UKU]]]W\\]\ST XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_930 4 * 0 0 * * 0 0 CTGGCATGTACGTCCATGTCAGACGAGCAGAACT #*0.2&.:((#'14'##-)#%$$2%#$/1&#%/#
+1831_576_1019 4 * 0 0 * * 0 0 GATGCGCGCTGTCATGGCATCGAACCATCGGTCG <$>$,1,&++&@,.)'+/+#9'69/6'2(+-'9-
+1831_576_1068 4 * 0 0 * * 0 0 AAGAGTGACTCACTTACCGGGATGCGATGGAGCC >><=4?;@7=??9?;9>5@9?:8@:=5';7;'#4
+1831_576_1131 16 phiX 4061 37 1S33M * 0 0 TACTCCGAACACCGTAAAGATGAGAAAGAGTTAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1168 4 * 0 0 * * 0 0 CACTTAACTCCGCTGTCGGAAGCCTTACAAGACA @CC6@@2?9=>7?;76<;467@;9,0%26'',4$
+1831_576_1207 4 * 0 0 * * 0 0 CAACCTGACTAAACGGGGATACGCTGGCGCTACA $>2,(&?4?(,@:<&,@>?$&:8A%%=0.%,597
+1831_576_1289 16 phiX 1899 37 1S33M * 0 0 AAAGTCATGGAATTGCGATTTCCACGAAACTGAA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1329 4 * 0 0 * * 0 0 CCAATAGCAGAGACCTAATAGGAAACGAAAGCAA >&0,870/A@@/5.;=;:'&@3'&$$%,+#($&2
+1831_576_1367 0 phiX 1839 37 33M1S * 0 0 AGCATGGCACTATGTTTACTCTTGCGCTTGTTCT ]]]]]]]]]]]]]]]]]]\]]]]]]]\YW]]XX XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1416 0 phiX 4522 37 33M1S * 0 0 TTCAGCGTTTGATGAATGCAATGCGACAGGCTCC K]VEHHS]P>TQJML]]\UFU][L6K]\TMPZ] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1461 4 * 0 0 * * 0 0 GAGGGGCGGCCCGGTTCAAGCAGGTAAGCAACAA 1&74#(#(&##$#,'###%#%%#*#&%%##%%%'
+1831_576_1605 0 phiX 5209 37 33M1S * 0 0 AGCTGGGTTACGACGCGACGCCGTTCAACCAGAC ]]]]]XU]]]]]]]]]]]]]]X[]KHLWZUXW, XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1664 4 * 0 0 * * 0 0 CGCGCACAACTCGCCATGATACGACAAGACCCGA @?>?@=A:@@>@='=<>=).>=9*8)(7#/++;)
+1831_576_1671 4 * 0 0 * * 0 0 GTCTTTGTAAGCCTGGCCTGGTCAGGTCTGGTCT @?@5>@@8&8?25#&)&5&&)15&&,&4%&,&6%
+1831_576_1729 16 phiX 5194 37 1S33M * 0 0 TATTAGGTTGAATGGTTCGACCCAATGCTGCGCT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1880 4 * 0 0 * * 0 0 TATGCGCTGTTGAGCTTACAACTCACTAGCGAAT 515/792,:,7/%/05,%$):+#8%2(1754))3
+1831_576_1982 16 phiX 5024 37 1S33M * 0 0 TATTGTTTTTCAGTCTATACCTGGAACGACGATT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1987 4 * 0 0 * * 0 0 AAGGTCTTCTGTCGGCGCTGGACTGAACACCTGA ##()(#&##)%)/-#%$11#%3>'##&$,#$$'+
+1831_576_2014 4 * 0 0 * * 0 0 CCGTGACACACAATGCCGGCCCCAGCCTAGCAAT @@9,B29-5>'?,+?79+/A';'2@'5&/9,6&1
+1831_576_2028 4 * 0 0 * * 0 0 ACTCGCCGCATCCCCGAGTGACGCTCGAGACCAA <>9>5'2(&707.8#&&39,0%7/#(#,*%&5*+
+1831_577_40 0 phiX 2772 25 33M1S * 0 0 TGTGACTATTGACGTCCTTCCCCGTACGCCGGGT JLE8>>9:MOR=!&74KQDKU]M?IG@P]]]PK XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_577_119 4 * 0 0 * * 0 0 TCCCACAAGCCATTGACATGCTCGCTCTAAACAG >?C11)-1/)#;#/,850*+.+$$5550+%-.40
+1831_577_133 16 phiX 1714 37 1S33M * 0 0 CGTACGCGAGATTAGAGACCCGTAGACCGATACT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_577_255 4 * 0 0 * * 0 0 ATTGAGGCCAAGATAATTGAGGAGAGAGAAGGTG <=87@>69%**#&#-+$.#&&#+$-+%&%.,%5#
+1831_577_281 16 phiX 4577 37 1S33M * 0 0 TAAAACTGTGAGAGTGCAACCGACTGCTGGCTAA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_577_288 0 phiX 3201 25 33M1S * 0 0 GGCACTTCTGCCGTTTCTGATAAGTTGCTTGATA SOY]]NL]]]UTZKIMNOCH/-LFLLK]VN5%F XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_577_322 16 phiX 458 37 1S33M * 0 0 CAAGACGGCAAAACCTAAATTGGCTTCTACTAAA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_577_362 0 phiX 2153 37 33M1S * 0 0 CTTATTCGCCACCATGATTATGACCAGTGTTTCA ]]]]]]]]]WV]RHVONRSI6=O[NK[ZL?%(J XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_577_382 16 phiX 4686 37 1S33M * 0 0 TCCAGCGTTCCGATTACTAAGTGTGCGGCTGACG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_577_464 0 phiX 2126 25 33M1S * 0 0 CCTTCTGGTGATTTGCAAGAACGCGTACTTATTG ]\OT]SQHP<,>6COKMHI3-HMNHNJLS@FUT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_577_488 16 phiX 4948 37 1S33M * 0 0 GCCGGACAACTACGATTTCCACTCGGCGAATTTC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_577_511 4 * 0 0 * * 0 0 GGTGCTTATCAGTTCTTTCTCGGGACTTGTATTT 7/+&$:<7%6,$$%'%/+)#$7((&*3>16'0/+
+1831_577_545 0 phiX 1711 37 33M1S * 0 0 TGTCATGCGCTCTAATCTCTGGGCATCTGGCTAT ]]]]]]]]]YURZ]]]]]]]VNNPV]WLLWZYN XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_577_559 0 phiX 3840 25 33M1S * 0 0 GCTGTTGCCGATACTTGGAACAATTTCTGGAAAG ]]]]]]]]WJWSWZW]XLX\P#-]]V=A(/TD9 XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_577_562 4 * 0 0 * * 0 0 GTTCCACTACGTTCCACGCAAAGGAATCCGAATC ((&*&0%)1%)#($2-,***%/-,,))&,-.1'*
+1831_577_637 0 phiX 4247 37 33M1S * 0 0 TGTTACTGAGAAGTTAATGGATGAATTGGCACAA BI2<]PLJN[M;7FYWYVW]]\WG@IQH6O]XI XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_577_641 4 * 0 0 * * 0 0 TATCTACCACCGCGGTGGCGCGAGAATGCTCCCT 47;/':A;;5?:72,(=),#*?+.#&7$8#%7/'
+1831_577_692 0 phiX 3905 37 33M1S * 0 0 GTCTAGGAAATAACCGTCAGGATTGACACCCTCA WU]]]SQ]]]TT]]]TS]]]PM]]]]]]]]]UZ XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_out1.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_out1.sam Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,145 @@
+@SQ SN:PHIX174 LN:5386
+1831_573_1004 0 PHIX174 278 25 33M1S * 0 0 TTAGATATGAGTCACATTTTGTTCATGGTAGAGG ]]>!4[WPFW]\ZSV]\YNRZOWWTTU]MJQEM XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1050 4 * 0 0 * * 0 0 TTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT ;@@17?@=>7??@A8?==@4A?A4)&+.'&+'1,
+1831_573_1067 4 * 0 0 * * 0 0 AAGTATGAGTCATTTACCGGGATGCGAAGAAAAG ++#%(',%/$,+&1#&),)&+'*'*%#$&#%('$
+1831_573_1219 16 PHIX174 2202 37 1S33M * 0 0 TACGTCACCTTATCAGTCCAATTTAAATTACACT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1242 0 PHIX174 648 37 33M1S * 0 0 CATCCCGTCAACATTCAAACGGCCTGTCTCATCC ]]]]XQ]]]]]]]][]]]VIMOW]SW]]]T*1] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1333 4 * 0 0 * * 0 0 AGAATCGTTACCACACACTGCGTCTGGGTATCCG ='@#%3=.>)/34*117,,/6-4+.9742456<)
+1831_573_1362 4 * 0 0 * * 0 0 CGATCTCAACCAGGTCCGCGCCCACCCCTGCCTC B@@?@@?@C@BA?@>@<@8A@?@'.8'?17:,+?
+1831_573_1448 16 PHIX174 2563 25 1S33M * 0 0 CGAAGTAACGTAAGTCTACCTATGTAGACAGTTG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1490 0 PHIX174 1767 25 33M1S * 0 0 CGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACC ]]]TU]]]=;]]]R9=BQQDF>QZQ1,LBIGEN XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1523 4 * 0 0 * * 0 0 ATGGAACGGAACGGGTGAGGAGGGGAACGTAGGG ,//*,#8''.6''6'2&27/80)%�'%#*,2'
+1831_573_1578 4 * 0 0 * * 0 0 CGAGTAGCAAACAAGACGCCAATCCAGGCGACCC ?=@<@C@?@@??=>@;7<<8<694)3511(+1<.
+1831_573_1647 16 PHIX174 1118 37 1S33M * 0 0 ACTGAGGAAGCTCTACCTGCGGCAACCGCGAGAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1684 16 PHIX174 3696 37 1S33M * 0 0 ATCACGACTCCAACTGAATCAAGTAGTCGTTTGC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_573_1769 4 * 0 0 * * 0 0 TGGACGTATAGTGGCGATGAGCATGTAGGTTCTC 54749&&767%/7;3$-7;3#,3//#,45/#&06
+1831_573_1853 4 * 0 0 * * 0 0 CAAAACGCCCGGGGCCTCACATGCGCGGCAGTTC 4./B@@72B+/4?@?7?+@9/+99.')2<2&)2&
+1831_573_1943 4 * 0 0 * * 0 0 ATAACGTATGGCAGTGAACGGGCGGAACCTGCCC >/*1,),@-)'0*>5'$/?6(a)/.&?8/(')A43,
+1831_573_1977 4 * 0 0 * * 0 0 GGCGTAGGGCTCATTGTGCAAGTATCCGACCTCC @?6@8@@9A@=4633A7?9?<3:+7>'@%@?%32
+1831_574_109 0 PHIX174 3729 37 33M1S * 0 0 CAGAATCAGCGGTATGGCTCTTCTCATATTGGCT VTO/2URQ]]]TZWU[UTQP\]XY]VS[WLN]] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_148 16 PHIX174 2369 37 1S33M * 0 0 GATCCAAAAGACGAATCCTCAAATTAGTACAAAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_185 4 * 0 0 * * 0 0 CCGTTTTGCCTAGTAATGCTCGGCGCAGCGTCGC <4;%%@7>#*>97%6;+8$#<07%1372$%<54#
+1831_574_243 0 PHIX174 2129 37 33M1S * 0 0 TCTGGTGATTTGCAAGAACGCGTACTTATTCGCA ]]]W]]]]RQ]YRBI]]Y0<]]]CI]]]FM]]Y XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_257 4 * 0 0 * * 0 0 ATACCTTCCAAAGCAATAGAATAAAAAACAGTAC >>B>)@?>B@B?@1%*%,#+00.'(+&5&%#$3&
+1831_574_293 4 * 0 0 * * 0 0 TGCTGCAAATAAACATACAGCCTTCTAATGACTA ')7'.*%#&$'/.$##$.$&*)+*$#8%&%#&&%
+1831_574_389 4 * 0 0 * * 0 0 CATGGCTATGCACCGGTTTGTAGCGTACTCGAGA )$>/>8):2@:213;;.1)@3%64%+)<7)+$92
+1831_574_575 16 PHIX174 4006 37 1S33M * 0 0 CTTACAGTGCGACTAATAAAACTGAAACTCGCAT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_592 16 PHIX174 2950 37 1S33M * 0 0 TCGCCATAACGAAGACGAGAACGACCACCGCGGT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_617 4 * 0 0 * * 0 0 AAGCATCGGCGGGAGCGCAAGCTGGGAAGGTGCC >@?;>?=@==@>@=5?@+@:@40@><:0)')64-
+1831_574_725 4 * 0 0 * * 0 0 GACAAGATGGCTATTATTTACAATCCGATCTGCA '>?52+5=;4$6;<8.1<47*9+?7(+5;-).)4
+1831_574_734 16 PHIX174 5263 25 1S33M * 0 0 CCTCTCTACTCTAACTCCGACCCTTTTCAATGAC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_574_824 16 PHIX174 3997 37 1S33M * 0 0 ATGGGAAGACTTACAGTGCGACTAATAAAACTGA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_959 4 * 0 0 * * 0 0 CGCGCTAGGACTCGGCCCCAAGAGACGTTCCGCC A=;+?@9;/<@;<8#&&/*%(%.0$0$2)+1$-1
+1831_574_1062 4 * 0 0 * * 0 0 ACCGGTAATATAAGGCAACATGATTACGGCCACG @821<//:136/8<1/5.427.;>288/0<1$)/
+1831_574_1092 4 * 0 0 * * 0 0 GACTGGCGAAATCATCGCGGAAAAACCCCTATCA 8?.,98)03%#,,,(,++$'69'#9'7#';6.%2
+1831_574_1103 4 * 0 0 * * 0 0 ATCTCCTGATTAGACATATCTCCGTAGCTCACGC ??>?@=>@@A<>?=@==<:?<;58'&&):+35'1
+1831_574_1116 4 * 0 0 * * 0 0 CACCTCACGTGAGTATAGCAGCCCGAGCGTCACC @++,)/)?./)%<)2>.==:8?&;44&/&,)/*4
+1831_574_1194 0 PHIX174 1136 37 33M1S * 0 0 GCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTG \]PT]]]]]]]ZZ]]]\Y]]]]\]]]V\]UMDK XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1204 16 PHIX174 1445 25 1S33M * 0 0 TGGATTACTCGAATTAGTTCTACTACGAGCAATA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1306 4 * 0 0 * * 0 0 ATTGCTTAGATCCAGTGGCGCTCAATACAACGGA 2.7&%''.$&#'%##$#'#%')')$'$%$,)'.1
+1831_574_1387 4 * 0 0 * * 0 0 GTACTTCTCAATGCTGCACTACTATGTACGCCCC ?A??:@?@@9?@@?9C;:>&).,,);&'<&7(/'
+1831_574_1431 4 * 0 0 * * 0 0 GACCAGTTTCAGGGCTAACCGTCCCTACTCGACC 7?>9BA897;+8*=6#6.+5&-#26$0.,5&'$1
+1831_574_1560 4 * 0 0 * * 0 0 GGCGTCTTAGGATTGAAGAGGGCCTCCCACCCCC ))&65<4=>99@:597(1&;+&(%&4#,#&&&*$
+1831_574_1591 16 PHIX174 5118 37 1S33M * 0 0 CTTCGACAAGTCTTAGTCTTACTCGGCGTTGAAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1624 16 PHIX174 4058 37 1S33M * 0 0 CATAACTCCGAACACCGTAAAGATGAGAAAGAGT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1826 4 * 0 0 * * 0 0 TACGTCGCGACCGAGCGTTATATAGTCTGACCCC @<<,))459<?.+57'-'&6?1)(;1;0&%4,#$
+1831_574_1903 0 PHIX174 5209 37 33M1S * 0 0 AGCTGGGTTACGACGCGACGCCGTTCAACCAGAC WT]]]][]]]][]]]]]]]]]]][X]QYYTYZ- XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_574_1961 0 PHIX174 3555 25 33M1S * 0 0 CGCGTTGCGTCTATTATGGAAAACACCAATCTTA TFPMV]OCBP]H<OS]<1CN]]M=K]52KO]WT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_575_54 16 PHIX174 3587 37 1S33M * 0 0 TAAGGTTCGTTGTCGTCCAAAGGCTCTAATACGC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_80 4 * 0 0 * * 0 0 TCTTTGGGTTTGGGGCAATTTGGTATGTTCGTCT ?5><@?/=5%(&.5)+#/0'/'>#$/'37#()6&
+1831_575_192 4 * 0 0 * * 0 0 AACTACGCCCCTTAATTACACAGCGCGTTAGACC :8<,9(.=/)#%8$,4'<,:)3/.'.5<,>-;7'
+1831_575_197 16 PHIX174 4861 37 1S33M * 0 0 CCGAACGTTTTATGCACCGGAATACCAATGTCAT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_223 4 * 0 0 * * 0 0 ACGCACAAAGGAGCTCGGCGCATAGCAACGCAGA ?>?8?@:?;>+$'%&,+#$.--#%$1'&#-7$&'
+1831_575_420 4 * 0 0 * * 0 0 CCCACATGGAAAACACTCACCGCCGAACAGAGCG 2?.>?(-''2+..@),$%1@;')1)?+)7.')..
+1831_575_434 4 * 0 0 * * 0 0 ATCGCTGCGAGGTCACCCTGGTTACGCCCCTTCC @@><@B@>=@@?>><@?;)@?;6.?=4,'';/3)
+1831_575_444 16 PHIX174 5098 37 1S33M * 0 0 TTCGACAGCGATGAAGGGTTCTTCGACAAGTCTT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_459 4 * 0 0 * * 0 0 CTTATCAGCACACGGTTTACCAGTCCGACTCCAA @@C@?A@@A@??A@@'6@@B'=;C@'4<=9,@1.
+1831_575_506 4 * 0 0 * * 0 0 CGCAGAACCCGCACGCTTGTGCTCACCACTGTAC ?)2*5@3+.7=?A:8@,/&):@37/<89'+4*)*
+1831_575_569 0 PHIX174 5043 25 33M1S * 0 0 GACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAAT IOBU[TMHTYG4M]7$?Q]]]57]SKXQC;:FE XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_575_622 0 PHIX174 2485 37 33M1S * 0 0 GTTTTACAGACACCTAAAGCTACATCGTCAACGC ]]]]]]]]]]]]]]]X[YVYINYLTLJWHO[$! XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_644 4 * 0 0 * * 0 0 CTTGAATGGCGATCTCGTCGAGGAAATACCACTA <8>.>&,+)8.10':5(/-62&*.=/%(.<&&18
+1831_575_663 16 PHIX174 1554 25 1S33M * 0 0 TGCTGAAGATGGTGTAGATAACTGTAATACCCAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_575_681 4 * 0 0 * * 0 0 TCTCCTGATTAGACATATCACTGTTGAACACAGC 72?.)25;96-14'*.03'#4#++$)/8+%/((&
+1831_575_711 4 * 0 0 * * 0 0 TATGTTCGTCCACGTCAGACGCGCAAAGTTGCGC %+:2(#'1+.)&*45-#,<A-%4;>+))<;##*&
+1831_575_730 4 * 0 0 * * 0 0 CACACAGGAACCATAGCGTATGTTATTCACCCCC %8<%-')5,*1&=#)(1*1)->7$.,4'4'&'*&
+1831_575_904 4 * 0 0 * * 0 0 ACCCGCTTAAAGACGTGAATTTTGCCTCCGCGCC <><.689@@7@<54:4=@>69=:+6-=42;8<:#
+1831_575_938 16 PHIX174 1133 37 1S33M * 0 0 TCTGCGGCAACCGCGAGAGGCAGAAAGAGGTAAC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_970 16 PHIX174 232 37 1S33M * 0 0 GCTCCTCTTCACCGAATTATACGAACCGTGCAAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_991 4 * 0 0 * * 0 0 TTCGGCGATCCCCCCCACGGCGCGATGCCGCGCA >>;7???/4<:::,#&*%'$--5-#(7)&&,-)%
+1831_575_1138 0 PHIX174 1913 37 33M1S * 0 0 GCTAAAGGTGCTTTGACTTATACCGATATTGCTC VT7-SURJS]X]TT]Z]UY]]]]]]X[UN[NMT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1157 16 PHIX174 3736 25 1S33M * 0 0 GTCGCCATACCGAGAAGAGTATAACCGCGATGAC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1180 0 PHIX174 4169 37 33M1S * 0 0 GGGCGTTGAGTTCGATAATGGTGATATGTATGTA ]]\]OJROX]]UUYSX\[Y]]]]VZ]\VRWLT] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1283 4 * 0 0 * * 0 0 GGTGGAATACTAAGGACTAATGTGCTGTCTCTTT C@A54><?A78@<?2>:#>=?@2>80:'?69.'7
+1831_575_1302 16 PHIX174 5202 37 1S33M * 0 0 GGAATGGTTCGACCCAATGCTGCGCTGCGGCAAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1310 16 PHIX174 1612 37 1S33M * 0 0 AATGACTGGTTCTTGCACTAATGAAGTACGTCGC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1321 0 PHIX174 2664 37 33M1S * 0 0 TTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACG ]]]]]]]]]]]]]]]]]]]\ND]]]7/]]]WPZ XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1373 16 PHIX174 4221 37 1S33M * 0 0 TAGACTGCAAGCACTACTCAAACATAGACAATGA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1419 4 * 0 0 * * 0 0 TGGGGAATATAACAGCGTAGCGTTGAACACTAGA //3->&-.->%11/@&/&7:<,-1=,+2<6'%&<
+1831_575_1436 0 PHIX174 3460 37 33M1S * 0 0 CTGGCATTCAGTCGGCGACTTCACGCCAGAATAC []]WULFXOGY]]\PY]]IGZ]]M@V]XAAMR[ XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1442 0 PHIX174 3317 37 33M1S * 0 0 TGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCG \V]]]]RE\]LEQ]]]X]]]]TZ]]QUPW]LIX XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1454 4 * 0 0 * * 0 0 CCTCCTAACCACGAGCCGAGGGGTCTCTGCCCCT 4=?8=6589=0:A=:?>6<;@/,4+-6+/'.,(,
+1831_575_1500 16 PHIX174 4269 37 1S33M * 0 0 GCTTAACCGTGTTACGATGTTACACGAGGGGGTT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1535 16 PHIX174 1225 37 1S33M * 0 0 GAGTGCAAATACCACTTGTCACCTAATTCAAGTA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_575_1724 4 * 0 0 * * 0 0 TCGTAAGTGTTAAGGAGCTGTGTACAAAACAACA <B.+@),2*.%)))4%2@;7#%(%+$8))85%&5
+1831_575_1829 4 * 0 0 * * 0 0 CATTTGCTGACCCTGCGTAGTTTAGTCTCACAGC 2(*'%.3$+7)@&%$'3*+*#/#/*+0.=&#)+0
+1831_575_1898 4 * 0 0 * * 0 0 CTTACCATATCATCTCAACCCATAACAGCACTTA B>@@&1/))'40)%#8/.%#8$((#;4'$'63,,
+1831_575_1964 4 * 0 0 * * 0 0 GACAGACCATGAGGCTGAAGACTACTAAGTGCGT >39)@2<2/@+9?2=&)>>@*62=5&2<42.'?+
+1831_576_32 16 PHIX174 988 25 1S33M * 0 0 TAACTCCTATTTAATACAGATTATAAGTTTGACC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_576_74 4 * 0 0 * * 0 0 ACATTCTGCAGTGGGACAGAGCGGTACGCCGCAA =@@?@>(??<B5?@@@9<@><+><;@';>6961?
+1831_576_86 16 PHIX174 2981 37 1S33M * 0 0 TGTACAGATTTAACAAACCTCCGCCAGTTTTTCG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_89 4 * 0 0 * * 0 0 GCTGTTTGATTTGAGAAGAGGAATTAAGCGCCGA )69+,'.4=-,>/>(*$#)3030*'(,%)2##$,
+1831_576_266 16 PHIX174 280 37 1S33M * 0 0 ATCTATACTCAGTGTAAAACAAGTACCATCTCTA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_576_327 16 PHIX174 4517 37 1S33M * 0 0 TATGATAAGTCGCAAACTACTTACGTTACGCTGT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_331 4 * 0 0 * * 0 0 GACGCTTTACTCCGGTAGTACCGTGCCGTTTATA -2)+(*.*1/;5%.-9&#/1'+($*$##()%/$.
+1831_576_387 4 * 0 0 * * 0 0 ACACGCCATGATCCGATAAGAAGGGAACGTAAGG 8).$5#1#*%.$##.*#$%##-%,+,1#&%.%))
+1831_576_406 4 * 0 0 * * 0 0 AGGTCTTACAGCACGGGGCTGAGCGCATCTGACC 4<>%%14:*4656)&<251&2+3#&19,6&4>5(
+1831_576_449 4 * 0 0 * * 0 0 CTCGAACCGCGGGGTCCAAAGACTGCTGCAAGGA +'&'.,,$/+.)$$8&%#+?&,#)-&###7,+#*
+1831_576_519 16 PHIX174 1625 37 1S33M * 0 0 GGCACTAATGAAGTACGTCGCAATGGCACTACAA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_603 16 PHIX174 1576 37 1S33M * 0 0 GGTAATACCCAGACGTTCGACGAATACGATTAAA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_655 16 PHIX174 3292 37 1S33M * 0 0 CAACGACGACGTAAAGGACTCGAATTACGAACCC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_576_677 4 * 0 0 * * 0 0 TTTACTCAGTTGATACATCACTGTAGCACATAGG 290&/*0#&'&,.2'#&*$&('#-%($*#%$)#%
+1831_576_718 4 * 0 0 * * 0 0 CGTGCCTTTCAGGGTCTTTTCTGGTCTGGTCGCT 4###$0###%(#########,####%####$###
+1831_576_722 4 * 0 0 * * 0 0 CGTATGATGGCGAGTCTTTATAATCCAATCTGAA *,-,##.)*&(*1%*(%(-2#+)-#.&-#%%$')
+1831_576_754 4 * 0 0 * * 0 0 AGGCGTCCTGCATCGACCGTTCTATCCGCTTAGA 4//#(.$)'',>($<,##%((,#5?#0*%1*
+1831_576_815 0 PHIX174 5186 37 33M1S * 0 0 GGAGTGCTTAATCCAACTTACCAAGCTGGGTTAC Z]]]]]]]]]]]]]]]]]]]UP[ZKIP]][ZUN XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_882 0 PHIX174 5053 37 33M1S * 0 0 CTAAAGGTCTAGGAGCTAAAGAATGGAACAACTG Q]]]]]W[]]]X]]]]][EAV]]ZUS\]]]VX] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_898 0 PHIX174 518 37 33M1S * 0 0 GATTGCTACTGACCGCTCTCGTGCTCGTCGCTGT ]]]Z]]]]]]]YW]]]]VV]]X]]]]]VO]]]T XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_923 0 PHIX174 1050 37 33M1S * 0 0 ATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCG MLOW]RMX[]]X]]]XV]]]UKU]]]W\\]\ST XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_930 4 * 0 0 * * 0 0 CTGGCATGTACGTCCATGTCAGACGAGCAGAACT #*0.2&.:((#'14'##-)#%$$2%#$/1&#%/#
+1831_576_1019 4 * 0 0 * * 0 0 GATGCGCGCTGTCATGGCATCGAACCATCGGTCG <$>$,1,&++&@,.)'+/+#9'69/6'2(+-'9-
+1831_576_1068 4 * 0 0 * * 0 0 AAGAGTGACTCACTTACCGGGATGCGATGGAGCC >><=4?;@7=??9?;9>5@9?:8@:=5';7;'#4
+1831_576_1131 16 PHIX174 4061 37 1S33M * 0 0 TACTCCGAACACCGTAAAGATGAGAAAGAGTTAG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1168 4 * 0 0 * * 0 0 CACTTAACTCCGCTGTCGGAAGCCTTACAAGACA @CC6@@2?9=>7?;76<;467@;9,0%26'',4$
+1831_576_1207 4 * 0 0 * * 0 0 CAACCTGACTAAACGGGGATACGCTGGCGCTACA $>2,(&?4?(,@:<&,@>?$&:8A%%=0.%,597
+1831_576_1289 16 PHIX174 1899 37 1S33M * 0 0 AAAGTCATGGAATTGCGATTTCCACGAAACTGAA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1329 4 * 0 0 * * 0 0 CCAATAGCAGAGACCTAATAGGAAACGAAAGCAA >&0,870/A@@/5.;=;:'&@3'&$$%,+#($&2
+1831_576_1367 0 PHIX174 1839 37 33M1S * 0 0 AGCATGGCACTATGTTTACTCTTGCGCTTGTTCT ]]]]]]]]]]]]]]]]]]\]]]]]]]\YW]]XX XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1416 0 PHIX174 4522 37 33M1S * 0 0 TTCAGCGTTTGATGAATGCAATGCGACAGGCTCC K]VEHHS]P>TQJML]]\UFU][L6K]\TMPZ] XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1461 4 * 0 0 * * 0 0 GAGGGGCGGCCCGGTTCAAGCAGGTAAGCAACAA 1&74#(#(&##$#,'###%#%%#*#&%%##%%%'
+1831_576_1605 0 PHIX174 5209 37 33M1S * 0 0 AGCTGGGTTACGACGCGACGCCGTTCAACCAGAC ]]]]]XU]]]]]]]]]]]]]]X[]KHLWZUXW, XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1664 4 * 0 0 * * 0 0 CGCGCACAACTCGCCATGATACGACAAGACCCGA @?>?@=A:@@>@='=<>=).>=9*8)(7#/++;)
+1831_576_1671 4 * 0 0 * * 0 0 GTCTTTGTAAGCCTGGCCTGGTCAGGTCTGGTCT @?@5>@@8&8?25#&)&5&&)15&&,&4%&,&6%
+1831_576_1729 16 PHIX174 5194 37 1S33M * 0 0 TATTAGGTTGAATGGTTCGACCCAATGCTGCGCT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1880 4 * 0 0 * * 0 0 TATGCGCTGTTGAGCTTACAACTCACTAGCGAAT 515/792,:,7/%/05,%$):+#8%2(1754))3
+1831_576_1982 16 PHIX174 5024 37 1S33M * 0 0 TATTGTTTTTCAGTCTATACCTGGAACGACGATT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_576_1987 4 * 0 0 * * 0 0 AAGGTCTTCTGTCGGCGCTGGACTGAACACCTGA ##()(#&##)%)/-#%$11#%3>'##&$,#$$'+
+1831_576_2014 4 * 0 0 * * 0 0 CCGTGACACACAATGCCGGCCCCAGCCTAGCAAT @@9,B29-5>'?,+?79+/A';'2@'5&/9,6&1
+1831_576_2028 4 * 0 0 * * 0 0 ACTCGCCGCATCCCCGAGTGACGCTCGAGACCAA <>9>5'2(&707.8#&&39,0%7/#(#,*%&5*+
+1831_577_40 0 PHIX174 2772 25 33M1S * 0 0 TGTGACTATTGACGTCCTTCCCCGTACGCCGGGT JLE8>>9:MOR=!&74KQDKU]M?IG@P]]]PK XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_577_119 4 * 0 0 * * 0 0 TCCCACAAGCCATTGACATGCTCGCTCTAAACAG >?C11)-1/)#;#/,850*+.+$$5550+%-.40
+1831_577_133 16 PHIX174 1714 37 1S33M * 0 0 CGTACGCGAGATTAGAGACCCGTAGACCGATACT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_577_255 4 * 0 0 * * 0 0 ATTGAGGCCAAGATAATTGAGGAGAGAGAAGGTG <=87@>69%**#&#-+$.#&&#+$-+%&%.,%5#
+1831_577_281 16 PHIX174 4577 37 1S33M * 0 0 TAAAACTGTGAGAGTGCAACCGACTGCTGGCTAA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_577_288 0 PHIX174 3201 25 33M1S * 0 0 GGCACTTCTGCCGTTTCTGATAAGTTGCTTGATA SOY]]NL]]]UTZKIMNOCH/-LFLLK]VN5%F XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_577_322 16 PHIX174 458 37 1S33M * 0 0 CAAGACGGCAAAACCTAAATTGGCTTCTACTAAA XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_577_362 0 PHIX174 2153 37 33M1S * 0 0 CTTATTCGCCACCATGATTATGACCAGTGTTTCA ]]]]]]]]]WV]RHVONRSI6=O[NK[ZL?%(J XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_577_382 16 PHIX174 4686 37 1S33M * 0 0 TCCAGCGTTCCGATTACTAAGTGTGCGGCTGACG XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_577_464 0 PHIX174 2126 25 33M1S * 0 0 CCTTCTGGTGATTTGCAAGAACGCGTACTTATTG ]\OT]SQHP<,>6COKMHI3-HMNHNJLS@FUT XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_577_488 16 PHIX174 4948 37 1S33M * 0 0 GCCGGACAACTACGATTTCCACTCGGCGAATTTC XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_577_511 4 * 0 0 * * 0 0 GGTGCTTATCAGTTCTTTCTCGGGACTTGTATTT 7/+&$:<7%6,$$%'%/+)#$7((&*3>16'0/+
+1831_577_545 0 PHIX174 1711 37 33M1S * 0 0 TGTCATGCGCTCTAATCTCTGGGCATCTGGCTAT ]]]]]]]]]YURZ]]]]]]]VNNPV]WLLWZYN XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
+1831_577_559 0 PHIX174 3840 25 33M1S * 0 0 GCTGTTGCCGATACTTGGAACAATTTCTGGAAAG ]]]]]]]]WJWSWZW]XLX\P#-]]V=A(/TD9 XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:33
+1831_577_562 4 * 0 0 * * 0 0 GTTCCACTACGTTCCACGCAAAGGAATCCGAATC ((&*&0%)1%)#($2-,***%/-,,))&,-.1'*
+1831_577_637 0 PHIX174 4247 37 33M1S * 0 0 TGTTACTGAGAAGTTAATGGATGAATTGGCACAA BI2<]PLJN[M;7FYWYVW]]\WG@IQH6O]XI XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:33
+1831_577_641 4 * 0 0 * * 0 0 TATCTACCACCGCGGTGGCGCGAGAATGCTCCCT 47;/':A;;5?:72,(=),#*?+.#&7$8#%7/'
+1831_577_692 0 PHIX174 3905 37 33M1S * 0 0 GTCTAGGAAATAACCGTCAGGATTGACACCCTCA WU]]]SQ]]]TT]]]TS]]]PM]]]]]]]]]UZ XT:A:U CM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:33
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_out2.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_out2.sam Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,97 @@
+@SQ SN:PHIX174 LN:5386
+1378_11_329 77 * 0 0 * * 0 0 GTTCGTGGCCGGTGGGTGTTTGGG ###$$#$#$&#####$'$#$###$
+1378_11_329 141 * 0 0 * * 0 0 AGACCGGGCGGGGTGGCGTTCGGT %##+'#######%###$#$##$(#
+1378_17_1788 77 * 0 0 * * 0 0 TGGGTGGATGTGTTGTCGTTCATG #$#$###$#$#######$#$####
+1378_17_1788 141 * 0 0 * * 0 0 TGCCGTGTCTTGCTAACGCCGATT #'#$$#$###%%##$$$$######
+1378_25_2035 77 * 0 0 * * 0 0 GTGCGTCGGGGAGGGTGCTGTCGG ######%#$%#$$###($###&&%
+1378_25_2035 141 * 0 0 * * 0 0 CTGCGTGTTGGTGTCTACTGGGGT #%#'##$#$##&%#%$$$%#%#'#
+1378_28_770 77 * 0 0 * * 0 0 GGTTGGGAGTTGGCGCGCGTGGTT ###$##%%$####%#%$#%%#($$
+1378_28_770 141 * 0 0 * * 0 0 CTGCTATGCCGGCCGCCTGCCCCT %##(#$$#%$#$%###$%%$##$#
+1378_33_1945 77 * 0 0 * * 0 0 GGGGGGGGGGGGGGGGGGGGGGGG .6;?;55697>8779457986264
+1378_33_1945 141 * 0 0 * * 0 0 GGGGGGGGGGGGGGGGGGGGGGGG +9;64287788752796/576352
+1378_34_789 77 * 0 0 * * 0 0 GGGCTTGCGTTAGTGAGAGGTTGT ###%$%$%%###$####$###$#&
+1378_34_789 141 * 0 0 * * 0 0 ATGGTGGCTGACGCGTTTGACTGT #$##%#$##$&$#%##$##$###$
+1378_35_263 77 * 0 0 * * 0 0 GGCGGGGGGGGGGGGGGGGGGGGG $%%&,)3/)%$-6'$&5&#$''5.
+1378_35_263 141 * 0 0 * * 0 0 GGGCGGGGGGCGGCGGGGGGGGGG ),,#%'$)'-(%&%'%',%%&&(&
+1378_43_186 77 * 0 0 * * 0 0 GCTAGGGTTTGGGTTTGCGGTGGG $%#$########%##%#$###'#'
+1378_43_186 141 * 0 0 * * 0 0 ATACTAGTTGGGACGCGTTGTGCT #$(4%$########$#$###$$$#
+1378_51_1671 77 * 0 0 * * 0 0 GGTGTTGTTCGGATGTAGCGTGGG ##%$$####$########$##0##
+1378_51_1671 141 * 0 0 * * 0 0 TGGGTAGGTGCGGCTCTTGCGGTC ##$#$%###%%#%#$##$$#####
+1378_56_324 77 * 0 0 * * 0 0 GCTGGGTGATCTAAGGAATGTAAT #$#####$$$#$$$$##%###*(#
+1378_56_324 141 * 0 0 * * 0 0 AGGCTTGGATTGTTGCGCTGACCT /+#**###%%##%#####$$$#$#
+1378_56_773 77 * 0 0 * * 0 0 TCTGGTCGGTTTCGGGGAGTGGAA ##%%#&$###$#$##%$####%%$
+1378_56_773 141 * 0 0 * * 0 0 TGTCGTGAGGTCACTTATCCCCAT &%#%##%%#####&#$%##$%##$
+1378_62_2027 77 * 0 0 * * 0 0 GTTGGCCTGGCCTGCCGTGCTGCG *##),/%##$)#%##1$#'%.#&#
+1378_62_2027 141 * 0 0 * * 0 0 CTTCCACGATCTGCTCGCTGTGGT (#&&$##$$#$%#%$$$#$###'#
+1378_62_2029 77 * 0 0 * * 0 0 GGCGGTGTGTGGTGCGGCTGTGCG /$$$=(####%####)$$%$-&%#
+1378_62_2029 141 * 0 0 * * 0 0 TCTGGGCTGTCTTCGGGTCGGTGT $%$$####$##$$#)##%%#$###
+1378_67_1795 77 * 0 0 * * 0 0 CGTGCCAACGACCGTCCTACGGAG @@.?7@@@.@:@A<<>>=>70<@7
+1378_67_1795 141 * 0 0 * * 0 0 TAGTGGGCTAGTCCGGGATCTACC B@C2@@?;-@@1>'A@@91@.@1'
+1378_68_466 77 * 0 0 * * 0 0 GTGTCATCTGAGGTAAAGCATTGT /##$09#$#.=$#$76+$%1'###
+1378_68_466 141 * 0 0 * * 0 0 GTGATCGTCGGTGCCAGTCCCTGT #(%)+##$#$#%#+$%##$#####
+1378_68_1692 77 * 0 0 * * 0 0 TGTTCGATTTAGGTCTTGCCTCGC ###%%$%%##%$#$##$###$$#$
+1378_68_1692 141 * 0 0 * * 0 0 GGTCTTCTGGGCACGGGCCAAGTT ###)%'&%##$###%#######$#
+1378_80_664 77 * 0 0 * * 0 0 TGTCTGCGTTGTATCTCTGGTGTA %##%,%$$#&$$###$#$%##'%#
+1378_80_664 141 * 0 0 * * 0 0 CTGCTTTGATCCCCGGTGGAGCAC 7#%###$$6#######$##$$$##
+1378_85_1786 77 * 0 0 * * 0 0 CCCTAGGAGCGTATACCGGACGAG ,'&/%/@,&1,&'/)&,6&&1)((
+1378_85_1786 141 * 0 0 * * 0 0 ATACTATGTCGATCTGTAAAAAAA )&.)#3%(a)$&%-,2#&+.-%0&./
+1378_86_1011 77 * 0 0 * * 0 0 AGGCGATGGGATATTATTTTACTT :$###)%##$9$###1$$#$2###
+1378_86_1011 141 * 0 0 * * 0 0 CTACGTTATTGCTCTGTTTGTCCT ######$%##$$$%###%#$####
+1378_86_1789 77 * 0 0 * * 0 0 GGCCAGGTACTAATAAATTCAACA 1.51/&%#'$,*?%#6&0$$5,20
+1378_86_1789 141 * 0 0 * * 0 0 CGAATATCCCCTGGTTTAGAAGTA 61?1;@/>@<=/(.7'(<5@=%$=
+1378_91_1596 77 * 0 0 * * 0 0 GCTTTTTCATTCGGTGCCTTTGGA '>%/3%=()8'#.%?50$&5>%)%
+1378_91_1596 141 * 0 0 * * 0 0 TTAGCGGTTGACTATCTGCTGACA *&+'#9'(%*'#//,&<),/)'*#
+1378_94_1595 77 * 0 0 * * 0 0 GCGGGTTAACCCAGTACCTCTGGC ,8@6'>8<76>@@(->9)7'*941
+1378_94_1595 141 * 0 0 * * 0 0 CGTGCGACAGCCCATGTTTTCAGA -=..5,3826&*+.+#+#%%6;%#
+1378_95_1039 77 * 0 0 * * 0 0 GTTCTGTGCCAGGTGAGGTACGGA &##,./#$&)6##+,'#$$0(##$
+1378_95_1039 141 * 0 0 * * 0 0 CGGCGTCCATCTTCGCCTTGAGAT $##.#$##$$#%$#$%%$###$)$
+1378_95_1767 77 * 0 0 * * 0 0 CTAATTGACCGGGCAAGCTATTAA ',,2'(10/>:=/7).3&%6&)&6
+1378_95_1767 141 * 0 0 * * 0 0 ACACGCACCATCTGGCGGCTAACC 86<65<4<C27/$5+#;-$+#2%$
+1378_96_1037 77 * 0 0 * * 0 0 CTGCTGGGCCATTTGACTTACTCA '$#+#(##-%5##+*&###-.$$$
+1378_96_1037 141 * 0 0 * * 0 0 ATCCCCCAAGATGCCTGTTGATTG $#$'##$$$#%$$#%###+##$#$
+1378_96_1764 77 * 0 0 * * 0 0 TCTTTGTAACCCACTTAGTATTTC 4='28>.@4881B807:822>%)1
+1378_96_1764 141 * 0 0 * * 0 0 TACACCGGAACCTTGCACTAAAGC 6-9'<#+81?+*<8-=69&6;*3)
+1378_98_1574 77 * 0 0 * * 0 0 AGGCGAGTGTGGGGGTTGTTTGAG +%%$#)##%##$####%###$%$#
+1378_98_1574 141 * 0 0 * * 0 0 GTTCTGCCGGTGTCTGTGGCGGGC $$#+&$$####%$$$###$%#%%#
+1378_107_1647 77 * 0 0 * * 0 0 GGTCTGGTTCTATGTTGGTCGACT ###'$$#$$$(#%###(#$##$%#
+1378_107_1647 141 * 0 0 * * 0 0 AGGCCTACTACGCGTCATTGATAG &#$$#$(.#%#$$####&$%##($
+1378_111_829 77 * 0 0 * * 0 0 GGTCTTCTATGGTGCTAAATTAGT $##'&###$##)#%($#$%#$'%#
+1378_111_829 141 * 0 0 * * 0 0 TGCGGCACTTGCTTCTTCGTATTT %#%##%#$%#$#%###$$##&#$$
+1378_111_1900 77 * 0 0 * * 0 0 GCACGCCTTTGGGCTAAGCCGTAA )$)'#%$########$'#&%$#(#
+1378_111_1900 141 * 0 0 * * 0 0 TCCCCTCGCTCGGCTCTGTGCTGT $&%*$#(#)##$#'##%(##$#$%
+1378_112_1483 77 * 0 0 * * 0 0 TGGAGTGGTGTGTTTGCTGAGCCA #$#)#############$#%#%'%
+1378_112_1483 141 * 0 0 * * 0 0 TGTCCAGCTATGCGGCTTCCTCCT %#$+#%#&#$#####%####%$##
+1378_125_1287 77 * 0 0 * * 0 0 TGACGTGGGTTGTCCCGTGAGATT ##$%%#$###$##$$#&%##$(%%
+1378_125_1287 141 * 0 0 * * 0 0 TGTCTCTGGGGGGCCTGGTTAGGT $##13$'%#$###$$###$$$#&#
+1378_126_468 77 * 0 0 * * 0 0 AAAAAACGGGCCGGGCGGACTAGG @@B@@@>@2>1+?:92)>@8&&<+
+1378_126_468 141 * 0 0 * * 0 0 TCTCCTGATTAGACATATCTCCGT 7>@318?=,=8@49579?..7,7)
+1378_127_664 77 * 0 0 * * 0 0 TCGCTTTGCCTATGTTTGTTCGGA #%$%#&##$%#%%###$$###)-'
+1378_127_664 141 * 0 0 * * 0 0 AGAGGTTGGTGTCTTGTCGCAGCT ##'#$######$$%######$$$#
+1378_129_463 77 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAA ,)&20%8'(&35-***1-2&+510
+1378_129_463 141 * 0 0 * * 0 0 CCTGTGGGCCAAGCCCAATGAAAG 8),,<6(--)<()5&.&/8+;.2+
+1378_129_875 77 * 0 0 * * 0 0 GACCTTTACGTATTGGGGGTTGGC ###)###+###$##$#&%##$,#$
+1378_129_875 141 * 0 0 * * 0 0 TTTCTATGGCTTACGCTGTCTGCC #$($##%####%$#$#####$###
+1378_140_1251 77 * 0 0 * * 0 0 TTTCCTTCGTGTGCGTGCGGAGTG #%#%$##$$$######.$$$%#%(
+1378_140_1251 141 * 0 0 * * 0 0 ATCCTAGCGCGGTGTCTTGGGGAC #$%1#$$$##$##$#$#$##$%$$
+1378_141_809 77 * 0 0 * * 0 0 TCTCGTGGTTTCTTTTTTATGTGT ##%)##$$#####%$#$#%%#'##
+1378_141_809 141 * 0 0 * * 0 0 TGTCCTCCAGTGTCTGTTGGGTGT %&,-##$$#(%###$#$$'###'#
+1378_144_983 77 * 0 0 * * 0 0 GTTCGTTCGTGGTGTACGAGGGTG #(#%#####($#%##$$#%##%#)
+1378_144_983 141 * 0 0 * * 0 0 AGCGCCCGGTTGGTGCGGCTCGTC -$(&%*$#*#))#$$$#%%$#$##
+1378_153_270 77 * 0 0 * * 0 0 GGCCGTGTGCGGGTGTAGATTGGA %$##($######&##$&$$$$%##
+1378_153_270 141 * 0 0 * * 0 0 AGTCCTTGTCCCCTGGGTTTTCCC +''$#&%$%#$##&$$($#&#$$#
+1378_155_1689 77 * 0 0 * * 0 0 CGCGTTCGGACAAAGCTAGCACCT @4@@=6=@@<4:@@681@@@.24.
+1378_155_1689 141 * 0 0 * * 0 0 TTGGGAGGGAGAGAGACTAGACCG ?4@@4<<??<?-@4@=4<5<?:/>
+1378_157_1580 77 * 0 0 * * 0 0 GGGATTGAAGGGATGTATGCTAGG #%$&%#$$'%$%#$##*#%$$$$#
+1378_157_1580 141 * 0 0 * * 0 0 TGGGCCTCGGTGCCCTTGGTCTGT #%)$##'#$$$&#####%#$#$##
+1378_161_317 77 * 0 0 * * 0 0 AATCCATACCCACAAAAGCAGGCC .&%','(@''?7//+&)+2.+)0)
+1378_161_317 141 * 0 0 * * 0 0 TTGGCCGGCAACCCCGGTACCTAA 7<,<'@)@>.)2@/')'&(?/-<(
+1378_177_735 77 * 0 0 * * 0 0 CGAGCCCTAAACCATGAGATCGGA @@B>@AC9@A<B8@=9>7@5)>,0
+1378_177_735 141 * 0 0 * * 0 0 AGTACGCCATGTATTTGCGACCAG =?@?C<88@=>:7>@55/.,416,
+1378_181_1684 77 * 0 0 * * 0 0 TTTCTGTTGTGGTTTTGTTGGGGT $##'$%'##%##$%$#$$####$*
+1378_181_1684 141 * 0 0 * * 0 0 CGACTCCCGCATTCACGGTCAAGT &*#,##$#&$*$$#$#$$$#%$##
+1378_187_1407 77 * 0 0 * * 0 0 TTGGGTGAAATCTTGTCGAGTGGA ####&##$$###$#####%##%%)
+1378_187_1407 141 * 0 0 * * 0 0 TGGCGTCCACTCGTGGGTCTATCG $#$'%#$%$%&$%#####$#$#%#
+1378_203_721 77 * 0 0 * * 0 0 CCTTTACGATCATAAACCATGACC 9<&./1&@;12')?<1',?/)&/.
+1378_203_721 141 * 0 0 * * 0 0 CCTAAATAACCCAGGGTAAAAGAT 9<1+2+1@=259@+;2.71&@+2&
+1378_206_2039 77 * 0 0 * * 0 0 CCCCCCCCCCCCCCCCCCCCCCCC ?=>>?=:;<>>4::<=56199(05
+1378_206_2039 141 * 0 0 * * 0 0 CCCCCCCCCCCCCCCCCCCCCCCC 9@?8>;>>><<994;=895895-8
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_out3.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_out3.sam Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,2 @@
+@SQ SN:PHIX174 LN:5386
+081017-and-081020:1:1:1715:1759 16 PHIX174 322 25 36M * 0 0 GATATTTTAAAGGAGCGTGGATTACTATCTGAGTCC B&&I13A$G$*%$IIIIIII9(.+5$IIIIIII#II XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:2C8A24
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/bwa_wrapper_out4.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa_wrapper_out4.sam Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,3 @@
+@SQ SN:PHIX174 LN:5386
+081017-and-081020:1:1:1715:1759 113 PHIX174 322 25 18M = 340 18 GATATTTTAAAGGAGCGT B&&I13A$G$*%$IIIII XT:A:U NM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:2C8A6
+081017-and-081020:1:1:1715:1759 177 PHIX174 340 37 18M = 322 -18 GGATTACTATCTGAGTCC II9(.+5$IIIIIII#II XT:A:U NM:i:0 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18
diff -r 1a56a541f9f9 -r 14d54c9dbd59 test-data/fastq_conv_in1.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastq_conv_in1.fastq Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,4 @@
+@081017-and-081020:1:1:1715:1759
+GGACTCAGATAGTAATCCACGCTCCTTTAAAATATC
++
+II#IIIIIII$5+.(9IIIIIII$%*$G$A31I&&B
diff -r 1a56a541f9f9 -r 14d54c9dbd59 tools/next_gen_conversion/fastq_conversions.xml
--- a/tools/next_gen_conversion/fastq_conversions.xml Tue Sep 15 14:26:20 2009 -0400
+++ b/tools/next_gen_conversion/fastq_conversions.xml Wed Sep 16 11:14:39 2009 -0400
@@ -52,7 +52,7 @@
<tests>
<test>
<param name="type" value="sol2std" />
- <param name="input" value="bwa_phiX_sanger.fastq" ftype="fastqsolexa" />
+ <param name="input" value="fastq_conv_in1.fastq" ftype="fastqsolexa" />
<output name="outputFastqsanger" file="fastq_conv_out1.fastqsanger" />
</test>
<test>
diff -r 1a56a541f9f9 -r 14d54c9dbd59 tools/sr_mapping/bwa_wrapper.py
--- a/tools/sr_mapping/bwa_wrapper.py Tue Sep 15 14:26:20 2009 -0400
+++ b/tools/sr_mapping/bwa_wrapper.py Wed Sep 16 11:14:39 2009 -0400
@@ -3,8 +3,6 @@
"""
Runs BWA on single-end or paired-end data.
Produces a SAM file containing the mappings.
-
-usage: python bwa_wrapper.py reference_sequence indexing_algorithm(is_or_bwtsw) forward_fastq_file reverse_fastq_file(or_None) output alignment_type(single_or_paired) parameters(pre_set_or_full) file_type(solexa_or_solid) file_source(indexed_or_history) maxEditDist fracMissingAligns maxGapOpens maxGapExtens disallowLongDel disallowIndel seed maxEditDistSeed numThreads mismatchPenalty gapOpenPenalty gapExtensPenalty colorSpaceRev suboptAlign noIterSearch outputTopN maxInsertSize maxOccurPairing\nThe last eighteen need to all be specified, or all be None
"""
import optparse, os, sys, tempfile
@@ -16,6 +14,7 @@
def __main__():
#Parse Command Line
parser = optparse.OptionParser()
+ parser.add_option('', '--threads', dest='threads', help='The number of threads to use')
parser.add_option('', '--ref', dest='ref', help='The reference genome to use or index')
parser.add_option('', '--fastq', dest='fastq', help='The (forward) fastq file to use for the mapping')
parser.add_option('', '--rfastq', dest='rfastq', help='The reverse fastq file to use for mapping if paired-end data')
@@ -32,7 +31,6 @@
parser.add_option('-i', '--disallowIndel', dest='disallowIndel', help='Disallow indel within specified bps')
parser.add_option('-l', '--seed', dest='seed', help='Take the first specified subsequences')
parser.add_option('-k', '--maxEditDistSeed', dest='maxEditDistSeed', help='Maximum edit distance to the seed')
- parser.add_option('-t', '--numThreads', dest='numThreads', help='Number of threads')
parser.add_option('-M', '--mismatchPenalty', dest='mismatchPenalty', help='Mismatch penalty')
parser.add_option('-O', '--gapOpenPenalty', dest='gapOpenPenalty', help='Gap open penalty')
parser.add_option('-E', '--gapExtensPenalty', dest='gapExtensPenalty', help='Gap extension penalty')
@@ -42,8 +40,9 @@
parser.add_option('', '--outputTopN', dest='outputTopN', help='Output top specified hits')
parser.add_option('', '--maxInsertSize', dest='maxInsertSize', help='Maximum insert size for a read pair to be considered mapped good')
parser.add_option('', '--maxOccurPairing', dest='maxOccurPairing', help='Maximum occurrences of a read for pairings')
+ parser.add_option('', '--dbkey', dest='dbkey', help='')
(options, args) = parser.parse_args()
-
+
# index if necessary
if options.fileSource == 'history':
# make temp directory for placement of indices and copy reference file there
@@ -65,46 +64,42 @@
else:
indexing_cmds = '-a %s' % indexingAlg
options.ref = os.path.join(tmp_dir,os.path.split(options.ref)[1])
- cmd1 = 'bwa index %s %s 2> /dev/null' % (indexing_cmds, options.ref)
+ cmd1 = 'cd %s; bwa index %s %s 2> /dev/null' % (tmp_dir, indexing_cmds, options.ref)
try:
os.system(cmd1)
except Exception, erf:
stop_err('Error indexing reference sequence\n' + str(erf))
-
# set up aligning and generate aligning command options
if options.params == 'pre_set':
if options.fileType == 'solid':
- aligning_cmds = '-c'
+ aligning_cmds = '-c -t %s' % options.threads
else:
- aligning_cmds = ''
+ aligning_cmds = '-t %s' % options.threads
gen_alignment_cmds = ''
else:
aligning_cmds = '-n %s -o %s -e %s -d %s -i %s %s -k %s -t %s -M %s -O %s -E %s %s %s %s' % \
((options.fracMissingAligns, options.maxEditDist)[options.maxEditDist != '0'],
options.maxGapOpens, options.maxGapExtens, options.disallowLongDel,
- options.disallowIndel, ('',' -l %s'%options.seed)[options.seed!=-1],
- options.maxEditDistSeed, options.numThreads, options.mismatchPenalty,
+ options.disallowIndel, ('',' -l %s'%options.seed)[options.seed!='-1'],
+ options.maxEditDistSeed, options.threads, options.mismatchPenalty,
options.gapOpenPenalty, options.gapExtensPenalty, ('',' -c')[options.colorSpaceRev=='true'],
('',' -R')[options.suboptAlign=='true'], ('',' -N')[options.noIterSearch=='true'])
if options.genAlignType == 'single':
gen_alignment_cmds = '-n %s' % options.outputTopN
elif options.genAlignType == 'paired':
gen_alignment_cmds = '-a %s -o %s' % (options.maxInsertSize, options.maxOccurPairing)
-
- # set up output file
- file(options.output,'w').write('QNAME\tFLAG\tRNAME\tPOS\tMAPQ\tCIGAR\tMRNM\tMPOS\tISIZE\tSEQ\tQUAL\tOPT\n')
+ # set up output files
tmp_align_out = tempfile.NamedTemporaryFile()
+ tmp_align_out2 = tempfile.NamedTemporaryFile()
# prepare actual aligning and generate aligning commands
cmd2 = 'bwa aln %s %s %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.fastq, tmp_align_out.name)
cmd2b = ''
if options.genAlignType == 'paired':
- tmp_align_out2 = tempfile.NamedTemporaryFile()
cmd2b = 'bwa aln %s %s %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.rfastq, tmp_align_out2.name)
cmd3 = 'bwa sampe %s %s %s %s %s %s >> %s 2> /dev/null' % (gen_alignment_cmds, options.ref, tmp_align_out.name, tmp_align_out2.name, options.fastq, options.rfastq, options.output)
else:
cmd3 = 'bwa samse %s %s %s %s >> %s 2> /dev/null' % (gen_alignment_cmds, options.ref, tmp_align_out.name, options.fastq, options.output)
-
# align
try:
os.system(cmd2)
@@ -116,11 +111,13 @@
os.system(cmd2b)
except Exception, erf:
stop_err("Error aligning second sequence\n" + str(erf))
-
# generate align
try:
os.system(cmd3)
except Exception, erf:
stop_err("Error sequence aligning sequence\n" + str(erf))
-
+ # clean up temp files
+ tmp_align_out.close()
+ tmp_align_out2.close()
+
if __name__=="__main__": __main__()
diff -r 1a56a541f9f9 -r 14d54c9dbd59 tools/sr_mapping/bwa_wrapper.xml
--- a/tools/sr_mapping/bwa_wrapper.xml Tue Sep 15 14:26:20 2009 -0400
+++ b/tools/sr_mapping/bwa_wrapper.xml Wed Sep 16 11:14:39 2009 -0400
@@ -1,7 +1,8 @@
-<tool id="bwa_wrapper" name="BWA" version="1.0.0">
+<tool id="bwa_wrapper" name="BWA" version="1.0.1">
<description> fast mapping of reads against reference sequence</description>
<command interpreter="python">
bwa_wrapper.py
+ --threads="8"
#if $solidOrSolexa.solidRefGenomeSource.refGenomeSource == "history":
--ref=$solidOrSolexa.solidRefGenomeSource.ownFile
#else:
@@ -27,7 +28,6 @@
--disallowIndel="None"
--seed="None"
--maxEditDistSeed="None"
- --numThreads="None"
--mismatchPenalty="None"
--gapOpenPenalty="None"
--gapExtensPenalty="None"
@@ -46,7 +46,6 @@
--disallowIndel=$params.disallowIndel
--seed=$params.seed
--maxEditDistSeed=$params.maxEditDistSeed
- --numThreads=$params.numThreads
--mismatchPenalty=$params.mismatchPenalty
--gapOpenPenalty=$params.gapOpenPenalty
--gapExtensPenalty=$params.gapExtensPenalty
@@ -57,9 +56,14 @@
--maxInsertSize=$params.maxInsertSize
--maxOccurPairing=$params.maxOccurPairing
#end if
+ #if $solidOrSolexa.solidRefGenomeSource.refGenomeSource == "history":
+ --dbkey=$dbkey
+ #else:
+ --dbkey="None"
+ #end if
</command>
<inputs>
- <conditional name="solidOrSolexa">
+ <conditional name="solidOrSolexa">
<param name="solidSolexa" type="select" label="Select SOLiD or Solexa format for the original dataset">
<option value="solexa">Solexa</option>
<option value="solid">SOLiD</option>
@@ -70,18 +74,18 @@
<option value="indexed">Use a built-in index</option>
<option value="history">Use one from the history</option>
</param>
- <when value="history">
- <param name="ownFile" type="data" format="fasta" label="Select a reference genome" />
- </when>
<when value="indexed">
<param name="indices" type="select" label="Select a reference genome">
<options from_file="sequence_index_color.loc">
<column name="value" index="1" />
<column name="name" index="0" />
- <filter type="sort_by" column="0" />
+ <filter type="sort_by" column="0" />
</options>
</param>
</when>
+ <when value="history">
+ <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" />
+ </when>
</conditional>
</when>
<when value="solexa">
@@ -90,39 +94,39 @@
<option value="indexed">Use a built-in index</option>
<option value="history">Use one from the history</option>
</param>
- <when value="history">
- <param name="ownFile" type="data" format="fasta" label="Select a reference genome" />
- </when>
<when value="indexed">
<param name="indices" type="select" label="Select a reference genome">
<options from_file="sequence_index_base.loc">
<column name="value" index="1" />
<column name="name" index="0" />
- <filter type="sort_by" column="0" />
+ <filter type="sort_by" column="0" />
</options>
</param>
+ </when>
+ <when value="history">
+ <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" />
</when>
</conditional>
</when>
</conditional>
- <conditional name="paired">
- <param name="sPaired" type="select" label="Is this library mate-paired?">
- <option value="single">Single-end</option>
- <option value="paired">Paired-end</option>
- </param>
- <when value="single">
- <param name="input1" type="data" format="fastqsanger" label="FASTQ file" />
- </when>
- <when value="paired">
- <param name="input1" type="data" format="fastqsanger" label="Forward FASTQ file" />
- <param name="input2" type="data" format="fastqsanger" label="Reverse FASTQ file" />
- </when>
- </conditional>
- <conditional name="params">
+ <conditional name="paired">
+ <param name="sPaired" type="select" label="Is this library mate-paired?">
+ <option value="single">Single-end</option>
+ <option value="paired">Paired-end</option>
+ </param>
+ <when value="single">
+ <param name="input1" type="data" format="fastqsanger" label="FASTQ file" />
+ </when>
+ <when value="paired">
+ <param name="input1" type="data" format="fastqsanger" label="Forward FASTQ file" />
+ <param name="input2" type="data" format="fastqsanger" label="Reverse FASTQ file" />
+ </when>
+ </conditional>
+ <conditional name="params">
<param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
- <option value="pre_set">Commonly used</option>
- <option value="full">Full Parameter List</option>
- </param>
+ <option value="pre_set">Commonly used</option>
+ <option value="full">Full Parameter List</option>
+ </param>
<when value="pre_set" />
<when value="full">
<param name="maxEditDist" type="integer" value="0" label="Maximum edit distance" help="Enter this value OR a fraction of missing alignments, not both" />
@@ -133,9 +137,8 @@
<param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end" />
<param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed" help="Enter -1 for infinity" />
<param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed" />
- <param name="numThreads" type="integer" value="1" label="Number of threads, in multi-threading mode" />
<param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty" help="BWA will not search for suboptimal hits with a score lower than [value]" />
- <param name="gapOpenPenalty" type="integer" value="1" label="Gap open penalty" />
+ <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty" />
<param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty" />
<param name="colorSpaceRev" type="select" label="Reverse query but don't compement it" help="Reverse query for all alignment in color space">
<option value="false">Don't reverse query</option>
@@ -150,34 +153,33 @@
</conditional>
</inputs>
<outputs>
- <data format="tabular" name="output" />
+ <data format="sam" name="output" />
</outputs>
-<!-- Tests all fail because of problem with nested conditionals in test framework
<tests>
<test>
<param name="solidSolexa" value="solexa" />
<param name="refGenomeSource" value="indexed" />
<param name="indices" value="phiX" />
<param name="sPaired" value="single" />
- <param name="input1" value="bwa_phiX_sanger.fastq" />
+ <param name="input1" value="bwa_wrapper_in1.fastq" />
<param name="source_select" value="pre_set" />
- <output name="output" file="bwa_wrapper_out0.tabular" />
+ <output name="output" file="bwa_wrapper_out0.sam" ftype="sam" />
</test>
<test>
<param name="solidSolexa" value="solid" />
<param name="refGenomeSource" value="history" />
<param name="ownFile" value="phiX.fa" />
<param name="sPaired" value="single" />
- <param name="input1" value="bwa_phiX_sanger.fastq" />
+ <param name="input1" value="bwa_wrapper_in0.fastq" />
<param name="source_select" value="pre_set" />
- <output name="output" file="bwa_wrapper_out0b.tabular" />
+ <output name="output" file="bwa_wrapper_out0b.sam" ftype="sam" />
</test>
<test>
<param name="solidSolexa" value="solid" />
<param name="refGenomeSource" value="indexed" />
<param name="indices" value="phiX" />
<param name="sPaired" value="single" />
- <param name="input1" value="bwa_solid.fastq" />
+ <param name="input1" value="bwa_wrapper_in2.fastq" />
<param name="source_select" value="full" />
<param name="maxEditDist" value="0" />
<param name="fracMissingAligns" value="0.04" />
@@ -187,9 +189,8 @@
<param name="disallowIndel" value="5" />
<param name="seed" value="-1" />
<param name="maxEditDistSeed" value="2" />
- <param name="numThreads" value="1" />
<param name="mismatchPenalty" value="3" />
- <param name="gapOpenPenalty" value="1" />
+ <param name="gapOpenPenalty" value="11" />
<param name="gapExtensPenalty" value="4" />
<param name="colorSpaceRev" value="true" />
<param name="suboptAlign" value="true" />
@@ -197,15 +198,15 @@
<param name="outputTopN" value="-1" />
<param name="maxInsertSize" value="500" />
<param name="maxOccurPairing" value="100000" />
- <output name="output" file="bwa_wrapper_out1.tabular" />
+ <output name="output" file="bwa_wrapper_out1.sam" ftype="sam" />
</test>
<test>
<param name="solidSolexa" value="solid" />
<param name="refGenomeSource" value="indexed" />
<param name="indices" value="phiX" />
<param name="sPaired" value="paired" />
- <param name="input1" value="bwa_solid_f.fastq" />
- <param name="input2" value="bwa_solid_r.fastq" />
+ <param name="input1" value="bwa_wrapper_in3.fastq" />
+ <param name="input2" value="bwa_wrapper_in4.fastq" />
<param name="source_select" value="full" />
<param name="maxEditDist" value="0" />
<param name="fracMissingAligns" value="0.04" />
@@ -215,9 +216,8 @@
<param name="disallowIndel" value="5" />
<param name="seed" value="-1" />
<param name="maxEditDistSeed" value="2" />
- <param name="numThreads" value="1" />
<param name="mismatchPenalty" value="3" />
- <param name="gapOpenPenalty" value="1" />
+ <param name="gapOpenPenalty" value="11" />
<param name="gapExtensPenalty" value="4" />
<param name="colorSpaceRev" value="true" />
<param name="suboptAlign" value="true" />
@@ -225,14 +225,14 @@
<param name="outputTopN" value="-1" />
<param name="maxInsertSize" value="500" />
<param name="maxOccurPairing" value="100000" />
- <output name="output" file="bwa_wrapper_out2.tabular" />
+ <output name="output" file="bwa_wrapper_out2.sam" ftype="sam" />
</test>
<test>
<param name="solidSolexa" value="solexa" />
<param name="refGenomeSource" value="indexed" />
<param name="indices" value="phiX" />
<param name="sPaired" value="single" />
- <param name="input1" value="bwa_phiX_sanger.fastq" />
+ <param name="input1" value="bwa_wrapper_in1.fastq" />
<param name="source_select" value="full" />
<param name="maxEditDist" value="0" />
<param name="fracMissingAligns" value="0.04" />
@@ -242,9 +242,8 @@
<param name="disallowIndel" value="5" />
<param name="seed" value="-1" />
<param name="maxEditDistSeed" value="2" />
- <param name="numThreads" value="1" />
<param name="mismatchPenalty" value="3" />
- <param name="gapOpenPenalty" value="1" />
+ <param name="gapOpenPenalty" value="11" />
<param name="gapExtensPenalty" value="4" />
<param name="colorSpaceRev" value="false" />
<param name="suboptAlign" value="true" />
@@ -252,15 +251,15 @@
<param name="outputTopN" value="-1" />
<param name="maxInsertSize" value="500" />
<param name="maxOccurPairing" value="100000" />
- <output name="output" file="bwa_wrapper_out3.tabular" />
+ <output name="output" file="bwa_wrapper_out3.sam" ftype="sam" />
</test>
<test>
<param name="solidSolexa" value="solexa" />
<param name="refGenomeSource" value="indexed" />
<param name="indices" value="phiX" />
<param name="sPaired" value="paired" />
- <param name="input1" value="bwa_phiX_sanger_f.fastq" />
- <param name="input2" value="bwa_phiX_sanger_r.fastq" />
+ <param name="input1" value="bwa_wrapper_in5.fastq" />
+ <param name="input2" value="bwa_wrapper_in6.fastq" />
<param name="source_select" value="full" />
<param name="maxEditDist" value="0" />
<param name="fracMissingAligns" value="0.04" />
@@ -270,9 +269,8 @@
<param name="disallowIndel" value="5" />
<param name="seed" value="-1" />
<param name="maxEditDistSeed" value="2" />
- <param name="numThreads" value="1" />
<param name="mismatchPenalty" value="3" />
- <param name="gapOpenPenalty" value="1" />
+ <param name="gapOpenPenalty" value="11" />
<param name="gapExtensPenalty" value="4" />
<param name="colorSpaceRev" value="false" />
<param name="suboptAlign" value="true" />
@@ -280,11 +278,10 @@
<param name="outputTopN" value="-1" />
<param name="maxInsertSize" value="500" />
<param name="maxOccurPairing" value="100000" />
- <output name="output" file="bwa_wrapper_out4.tabular" />
- </test>
+ <output name="output" file="bwa_wrapper_out4.sam" ftype="sam" />
+ </test>
</tests>
--->
- <help>
+ <help>
**What it does**
@@ -294,7 +291,7 @@
**Input formats**
-BWA accepts files in FASTQ format.
+BWA accepts files in Sanger FASTQ format.
------
@@ -336,7 +333,7 @@
-------
-**BWA Settings**
+**BWA settings**
All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
@@ -390,9 +387,9 @@
-o INT Maximum occurrences of a read for pairing. A read with more
occurrences will be treated as a single-end read. Reducing this
parameter helps faster pairing. [100000]
-
- </help>
+ </help>
+ <code file="bwa_wrapper_code.py" />
</tool>
diff -r 1a56a541f9f9 -r 14d54c9dbd59 tools/sr_mapping/bwa_wrapper_code.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/sr_mapping/bwa_wrapper_code.py Wed Sep 16 11:14:39 2009 -0400
@@ -0,0 +1,8 @@
+import os
+
+def exec_before_job(app, inp_data, out_data, param_dict, tool):
+ try:
+ refFile = param_dict['solidOrSolexa']['solidRefGenomeSource']['indices'].value
+ out_data['output'].set_dbkey(os.path.split(refFile)[1].split('.')[0])
+ except Exception, eq:
+ out_data['output'].set_dbkey(param_dict['dbkey'])
1
0
16 Sep '09
details: http://www.bx.psu.edu/hg/galaxy/rev/b25489f4fb26
changeset: 2695:b25489f4fb26
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Tue Sep 15 12:04:14 2009 -0400
description:
Add a new tool, Mutate by SNP codon, which will take codon position and sequence information that is joined to SNP data and create a 'mutated codon'.
6 file(s) affected in this change:
test-data/mutate_snp_codon_in.interval
test-data/mutate_snp_codon_out.interval
tool_conf.xml.main
tool_conf.xml.sample
tools/evolution/mutate_snp_codon.py
tools/evolution/mutate_snp_codon.xml
diffs (193 lines):
diff -r 990231e77b88 -r b25489f4fb26 test-data/mutate_snp_codon_in.interval
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mutate_snp_codon_in.interval Tue Sep 15 12:04:14 2009 -0400
@@ -0,0 +1,6 @@
+chr1 58995 58998 NM_001005484 0 + GAA GAA Glu GAA 1177632 28.96 0 2787607 0.422452662804 585 chr1 58996 58997 rs1638318 0 + A A A/G genomic single by-submitter 0 0 unknown exact 3
+chr1 59289 59292 NM_001005484 0 + TTT TTT Phe TTT 714298 17.57 0 1538990 0.464134269878 585 chr1 59290 59291 rs71245814 0 + T T G/T genomic single unknown 0 0 unknown exact 3
+chr1 59313 59316 NM_001005484 0 + AAG AAG Lys AAG 1295568 31.86 0 2289189 0.565950648898 585 chr1 59315 59316 rs2854682 0 - G G C/T genomic single by-submitter 0 0 unknown exact 3
+chr1 59373 59376 NM_001005484 0 + ACA ACA Thr ACA 614523 15.11 0 2162384 0.284187729839 585 chr1 59373 59374 rs2691305 0 - A A C/T genomic single unknown 0 0 unknown exact 3
+chr1 59412 59415 NM_001005484 0 + GCG GCG Ala GCG 299495 7.37 0 2820741 0.106176001271 585 chr1 59414 59415 rs2531266 0 + G G C/G genomic single by-submitter 0 0 unknown exact 3
+chr1 59412 59415 NM_001005484 0 + GCG GCG Ala GCG 299495 7.37 0 2820741 0.106176001271 585 chr1 59414 59415 rs55874132 0 + G G C/G genomic single unknown 0 0 coding-synon exact 1
diff -r 990231e77b88 -r b25489f4fb26 test-data/mutate_snp_codon_out.interval
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mutate_snp_codon_out.interval Tue Sep 15 12:04:14 2009 -0400
@@ -0,0 +1,6 @@
+chr1 58995 58998 NM_001005484 0 + GAA GAA Glu GAA 1177632 28.96 0 2787607 0.422452662804 585 chr1 58996 58997 rs1638318 0 + A A A/G genomic single by-submitter 0 0 unknown exact 3 GGA
+chr1 59289 59292 NM_001005484 0 + TTT TTT Phe TTT 714298 17.57 0 1538990 0.464134269878 585 chr1 59290 59291 rs71245814 0 + T T G/T genomic single unknown 0 0 unknown exact 3 TGT
+chr1 59313 59316 NM_001005484 0 + AAG AAG Lys AAG 1295568 31.86 0 2289189 0.565950648898 585 chr1 59315 59316 rs2854682 0 - G G C/T genomic single by-submitter 0 0 unknown exact 3 AAA
+chr1 59373 59376 NM_001005484 0 + ACA ACA Thr ACA 614523 15.11 0 2162384 0.284187729839 585 chr1 59373 59374 rs2691305 0 - A A C/T genomic single unknown 0 0 unknown exact 3 GCA
+chr1 59412 59415 NM_001005484 0 + GCG GCG Ala GCG 299495 7.37 0 2820741 0.106176001271 585 chr1 59414 59415 rs2531266 0 + G G C/G genomic single by-submitter 0 0 unknown exact 3 GCC
+chr1 59412 59415 NM_001005484 0 + GCG GCG Ala GCG 299495 7.37 0 2820741 0.106176001271 585 chr1 59414 59415 rs55874132 0 + G G C/G genomic single unknown 0 0 coding-synon exact 1 GCC
diff -r 990231e77b88 -r b25489f4fb26 tool_conf.xml.main
--- a/tool_conf.xml.main Tue Sep 15 11:28:50 2009 -0400
+++ b/tool_conf.xml.main Tue Sep 15 12:04:14 2009 -0400
@@ -132,10 +132,11 @@
<tool file="regVariation/best_regression_subsets.xml" />
<tool file="regVariation/rcve.xml" />
</section>
- <section name="Evolution: HyPhy" id="hyphy">
+ <section name="Evolution" id="hyphy">
<tool file="hyphy/hyphy_branch_lengths_wrapper.xml" />
<tool file="hyphy/hyphy_nj_tree_wrapper.xml" />
<tool file="hyphy/hyphy_dnds_wrapper.xml" />
+ <tool file="evolution/mutate_snp_codon.xml" />
</section>
<section name="Metagenomic analyses" id="tax_manipulation">
<tool file="taxonomy/gi2taxonomy.xml" />
diff -r 990231e77b88 -r b25489f4fb26 tool_conf.xml.sample
--- a/tool_conf.xml.sample Tue Sep 15 11:28:50 2009 -0400
+++ b/tool_conf.xml.sample Tue Sep 15 12:04:14 2009 -0400
@@ -152,10 +152,11 @@
<tool file="regVariation/best_regression_subsets.xml" />
<tool file="regVariation/rcve.xml" />
</section>
- <section name="Evolution: HyPhy" id="hyphy">
+ <section name="Evolution" id="hyphy">
<tool file="hyphy/hyphy_branch_lengths_wrapper.xml" />
<tool file="hyphy/hyphy_nj_tree_wrapper.xml" />
<tool file="hyphy/hyphy_dnds_wrapper.xml" />
+ <tool file="evolution/mutate_snp_codon.xml" />
</section>
<section name="Metagenomic analyses" id="tax_manipulation">
<tool file="taxonomy/gi2taxonomy.xml" />
diff -r 990231e77b88 -r b25489f4fb26 tools/evolution/mutate_snp_codon.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/evolution/mutate_snp_codon.py Tue Sep 15 12:04:14 2009 -0400
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+"""
+Script to mutate SNP codons.
+Dan Blankenberg
+"""
+
+import sys, string
+
+def strandify( fields, column ):
+ strand = '+'
+ if column >= 0 and column < len( fields ):
+ strand = fields[ column ]
+ if strand not in [ '+', '-' ]:
+ strand = '+'
+ return strand
+
+def main():
+ # parse command line
+ input_file = sys.argv[1]
+ out = open( sys.argv[2], 'wb+' )
+ codon_chrom_col = int( sys.argv[3] ) - 1
+ codon_start_col = int( sys.argv[4] ) - 1
+ codon_end_col = int( sys.argv[5] ) - 1
+ codon_strand_col = int( sys.argv[6] ) - 1
+ codon_seq_col = int( sys.argv[7] ) - 1
+
+ snp_chrom_col = int( sys.argv[8] ) - 1
+ snp_start_col = int( sys.argv[9] ) - 1
+ snp_end_col = int( sys.argv[10] ) - 1
+ snp_strand_col = int( sys.argv[11] ) - 1
+ snp_observed_col = int( sys.argv[12] ) - 1
+
+ max_field_index = max( codon_chrom_col, codon_start_col, codon_end_col, codon_strand_col, codon_seq_col, snp_chrom_col, snp_start_col, snp_end_col, snp_strand_col, snp_observed_col )
+
+ DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
+ skipped_lines = 0
+ for line in open( input_file ):
+ line = line.rstrip( '\n\r' )
+ if line and not line.startswith( '#' ):
+ fields = line.split( '\t' )
+ if max_field_index >= len( fields ):
+ skipped_lines += 1
+ continue
+ codon_chrom = fields[codon_chrom_col]
+ codon_start = int( fields[codon_start_col] )
+ codon_end = int( fields[codon_end_col] )
+ codon_strand = strandify( fields, codon_strand_col )
+ codon_seq = fields[codon_seq_col].upper()
+
+ snp_chrom = fields[snp_chrom_col]
+ snp_start = int( fields[snp_start_col] )
+ snp_end = int( fields[snp_end_col] )
+ snp_strand = strandify( fields, snp_strand_col )
+ snp_observed = fields[snp_observed_col].split( '/' )
+
+ for observed in snp_observed:
+ #Extract DNA on neg strand codons will have positions reversed relative to interval positions; i.e. position 0 == position 2
+ offset = snp_start - codon_start
+ if codon_strand == '-':
+ offset = 2 - offset
+ assert offset >= 0 and offset <= 2, ValueError( 'Impossible offset determined: %s' % offset )
+
+ if codon_strand != snp_strand:
+ #if our SNP is on a different strand than our codon, take complement of provided observed SNP base
+ observed = observed.translate( DNA_COMP )
+ snp_codon = [ char for char in codon_seq ]
+ snp_codon[offset] = observed.upper()
+ snp_codon = ''.join( snp_codon )
+
+ if codon_seq != snp_codon: #only output when we actually have a different codon
+ out.write( "%s\t%s\n" % ( line, snp_codon ) )
+
+if __name__ == "__main__": main()
diff -r 990231e77b88 -r b25489f4fb26 tools/evolution/mutate_snp_codon.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/evolution/mutate_snp_codon.xml Tue Sep 15 12:04:14 2009 -0400
@@ -0,0 +1,60 @@
+<tool id="mutate_snp_codon_1" name="Mutate Codons" version="1.0.0">
+ <description>with SNPs</description>
+ <command interpreter="python">mutate_snp_codon.py $input1 $output1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol} $codon_seq_col $snp_chrom_col $snp_start_col $snp_end_col $snp_strand_col $snp_observed_col</command>
+ <inputs>
+ <param name="input1" type="data" format="interval" label="Interval file with joined SNPs" optional="False" help="The interval metadata for this file should be set for the codon positions."/>
+ <param name="codon_seq_col" label="Codon Sequence column" type="data_column" data_ref="input1" />
+ <param name="snp_chrom_col" label="SNP chromosome column" type="data_column" data_ref="input1" />
+ <param name="snp_start_col" label="SNP start column" type="data_column" data_ref="input1" />
+ <param name="snp_end_col" label="SNP end column" type="data_column" data_ref="input1" />
+ <param name="snp_strand_col" label="SNP strand column" type="data_column" data_ref="input1" />
+ <param name="snp_observed_col" label="SNP observed column" type="data_column" data_ref="input1" />
+ </inputs>
+ <outputs>
+ <data name="output1" format="interval" metadata_source="input1"/>
+ </outputs>
+ <tests>
+ <test>
+ <param name="input1" value="mutate_snp_codon_in.interval"/>
+ <param name="codon_seq_col" value="8"/>
+ <param name="snp_chrom_col" value="17"/>
+ <param name="snp_start_col" value="18"/>
+ <param name="snp_end_col" value="19"/>
+ <param name="snp_strand_col" value="22"/>
+ <param name="snp_observed_col" value="25"/>
+ <output name="output1" file="mutate_snp_codon_out.interval" />
+ </test>
+ </tests>
+ <help>
+This tool takes an interval file as input. This input should contain a set of codon locations and corresponding DNA sequence (such as from the *Extract Genomic DNA* tool) joined to SNP locations with observed values (such as *all fields from selected table* from the snp130 table of hg18 at the UCSC Table browser). This interval file should have the metadata (chromosome, start, end, strand) set for the columns containing the locations of the codons. The user needs to specify the columns containing the sequence for the codon as well as the genomic positions and observed values (values should be split by '/') for the SNP data as tool input; SNPs positions and sequence substitutes must have a length of exactly 1. Only genomic intervals which yield a different sequence string are output. All sequence characters are converted to uppercase during processing.
+
+ For example, using these settings:
+
+ * **metadata** **chromosome**, **start**, **end** and **strand** set to **1**, **2**, **3** and **6**, respectively
+ * **Codon Sequence column** set to **c8**
+ * **SNP chromosome column** set to **c17**
+ * **SNP start column** set to **c18**
+ * **SNP end column** set to **c19**
+ * **SNP strand column** set to **c22**
+ * **SNP observed column** set to **c25**
+
+ with the following input::
+
+ chr1 58995 58998 NM_001005484 0 + GAA GAA Glu GAA 1177632 28.96 0 2787607 0.422452662804 585 chr1 58996 58997 rs1638318 0 + A A A/G genomic single by-submitter 0 0 unknown exact 3
+ chr1 59289 59292 NM_001005484 0 + TTT TTT Phe TTT 714298 17.57 0 1538990 0.464134269878 585 chr1 59290 59291 rs71245814 0 + T T G/T genomic single unknown 0 0 unknown exact 3
+ chr1 59313 59316 NM_001005484 0 + AAG AAG Lys AAG 1295568 31.86 0 2289189 0.565950648898 585 chr1 59315 59316 rs2854682 0 - G G C/T genomic single by-submitter 0 0 unknown exact 3
+ chr1 59373 59376 NM_001005484 0 + ACA ACA Thr ACA 614523 15.11 0 2162384 0.284187729839 585 chr1 59373 59374 rs2691305 0 - A A C/T genomic single unknown 0 0 unknown exact 3
+ chr1 59412 59415 NM_001005484 0 + GCG GCG Ala GCG 299495 7.37 0 2820741 0.106176001271 585 chr1 59414 59415 rs2531266 0 + G G C/G genomic single by-submitter 0 0 unknown exact 3
+ chr1 59412 59415 NM_001005484 0 + GCG GCG Ala GCG 299495 7.37 0 2820741 0.106176001271 585 chr1 59414 59415 rs55874132 0 + G G C/G genomic single unknown 0 0 coding-synon exact 1
+
+
+ will produce::
+
+ chr1 58995 58998 NM_001005484 0 + GAA GAA Glu GAA 1177632 28.96 0 2787607 0.422452662804 585 chr1 58996 58997 rs1638318 0 + A A A/G genomic single by-submitter 0 0 unknown exact 3 GGA
+ chr1 59289 59292 NM_001005484 0 + TTT TTT Phe TTT 714298 17.57 0 1538990 0.464134269878 585 chr1 59290 59291 rs71245814 0 + T T G/T genomic single unknown 0 0 unknown exact 3 TGT
+ chr1 59313 59316 NM_001005484 0 + AAG AAG Lys AAG 1295568 31.86 0 2289189 0.565950648898 585 chr1 59315 59316 rs2854682 0 - G G C/T genomic single by-submitter 0 0 unknown exact 3 AAA
+ chr1 59373 59376 NM_001005484 0 + ACA ACA Thr ACA 614523 15.11 0 2162384 0.284187729839 585 chr1 59373 59374 rs2691305 0 - A A C/T genomic single unknown 0 0 unknown exact 3 GCA
+ chr1 59412 59415 NM_001005484 0 + GCG GCG Ala GCG 299495 7.37 0 2820741 0.106176001271 585 chr1 59414 59415 rs2531266 0 + G G C/G genomic single by-submitter 0 0 unknown exact 3 GCC
+ chr1 59412 59415 NM_001005484 0 + GCG GCG Ala GCG 299495 7.37 0 2820741 0.106176001271 585 chr1 59414 59415 rs55874132 0 + G G C/G genomic single unknown 0 0 coding-synon exact 1 GCC
+ </help>
+</tool>
1
0
16 Sep '09
details: http://www.bx.psu.edu/hg/galaxy/rev/b179523d7d78
changeset: 2693:b179523d7d78
user: anton(a)nekrut-mbp.bx.psu.edu
date: Tue Sep 15 10:30:54 2009 -0400
description:
Modification for input datatype in the splitter:w
1 file(s) affected in this change:
tools/metag_tools/split_paired_reads.xml
diffs (21 lines):
diff -r ba884f1a6b81 -r b179523d7d78 tools/metag_tools/split_paired_reads.xml
--- a/tools/metag_tools/split_paired_reads.xml Tue Sep 15 10:15:42 2009 -0400
+++ b/tools/metag_tools/split_paired_reads.xml Tue Sep 15 10:30:54 2009 -0400
@@ -4,7 +4,7 @@
split_paired_reads.py $input $output1 $output2
</command>
<inputs>
- <param name="input" type="data" format="fastqsolexa" label="Your paired-end file" />
+ <param name="input" type="data" format="fastqsolexa,fastqsanger" label="Your paired-end file" />
</inputs>
<outputs>
<data name="output1" format="fastqsolexa"/>
@@ -20,7 +20,7 @@
**What it does**
-Splits a single fastq datasret representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length.
+Splits a single fastq dataset representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length.
-----
1
0
16 Sep '09
details: http://www.bx.psu.edu/hg/galaxy/rev/990231e77b88
changeset: 2694:990231e77b88
user: guru
date: Tue Sep 15 11:28:50 2009 -0400
description:
More modifications to fastx toolkit: added fastqsanger to input format list and changed the ascii offset param to select.
4 file(s) affected in this change:
tools/fastx_toolkit/fasta_nucleotide_changer.xml
tools/fastx_toolkit/fastq_quality_converter.xml
tools/fastx_toolkit/fastq_quality_filter.xml
tools/fastx_toolkit/fastx_quality_statistics.xml
diffs (82 lines):
diff -r b179523d7d78 -r 990231e77b88 tools/fastx_toolkit/fasta_nucleotide_changer.xml
--- a/tools/fastx_toolkit/fasta_nucleotide_changer.xml Tue Sep 15 10:30:54 2009 -0400
+++ b/tools/fastx_toolkit/fasta_nucleotide_changer.xml Tue Sep 15 11:28:50 2009 -0400
@@ -10,21 +10,21 @@
</param>
</inputs>
+ <!--
+ Functional tests with param value starting with - fail.
<tests>
<test>
- <!-- DNA-to-RNA -->
<param name="input" value="fasta_nuc_changer1.fasta" />
<param name="mode" value="-r" />
<output name="output" file="fasta_nuc_change1.out" />
</test>
<test>
- <!-- RNA-to-DNA -->
<param name="input" value="fasta_nuc_changer2.fasta" />
<param name="mode" value="-d" />
<output name="output" file="fasta_nuc_change2.out" />
</test>
</tests>
-
+ -->
<outputs>
<data format="input" name="output" metadata_source="input" />
diff -r b179523d7d78 -r 990231e77b88 tools/fastx_toolkit/fastq_quality_converter.xml
--- a/tools/fastx_toolkit/fastq_quality_converter.xml Tue Sep 15 10:30:54 2009 -0400
+++ b/tools/fastx_toolkit/fastq_quality_converter.xml Tue Sep 15 11:28:50 2009 -0400
@@ -2,15 +2,18 @@
<description>(ASCII-Numeric)</description>
<command>zcat -f $input | fastq_quality_converter $QUAL_FORMAT -o $output -Q $offset</command>
<inputs>
- <param format="fastqsolexa" name="input" type="data" label="Library to convert" />
+ <param format="fastqsolexa,fastqsanger" name="input" type="data" label="Library to convert" />
<param name="QUAL_FORMAT" type="select" label="Desired output format">
<option value="-a">ASCII (letters) quality scores</option>
<option value="-n">Numeric quality scores</option>
</param>
- <param name="offset" size="4" type="integer" value="33" label="FASTQ ASCII offset" />
- </inputs>
+ <param name="offset" type="select" label="FASTQ ASCII offset">
+ <option value="33">33</option>
+ <option value="64">64</option>
+ </param>
+ </inputs>
<tests>
<test>
diff -r b179523d7d78 -r 990231e77b88 tools/fastx_toolkit/fastq_quality_filter.xml
--- a/tools/fastx_toolkit/fastq_quality_filter.xml Tue Sep 15 10:30:54 2009 -0400
+++ b/tools/fastx_toolkit/fastq_quality_filter.xml Tue Sep 15 11:28:50 2009 -0400
@@ -4,7 +4,7 @@
<command>zcat -f '$input' | fastq_quality_filter -q $quality -p $percent -v -o $output</command>
<inputs>
- <param format="fastqsolexa" name="input" type="data" label="Library to filter" />
+ <param format="fastqsolexa,fastqsanger" name="input" type="data" label="Library to filter" />
<param name="quality" size="4" type="integer" value="20">
<label>Quality cut-off value</label>
diff -r b179523d7d78 -r 990231e77b88 tools/fastx_toolkit/fastx_quality_statistics.xml
--- a/tools/fastx_toolkit/fastx_quality_statistics.xml Tue Sep 15 10:30:54 2009 -0400
+++ b/tools/fastx_toolkit/fastx_quality_statistics.xml Tue Sep 15 11:28:50 2009 -0400
@@ -3,8 +3,11 @@
<command>zcat -f $input | fastx_quality_stats -o $output -Q $offset</command>
<inputs>
- <param format="fasta,fastqsolexa" name="input" type="data" label="Library to analyse" />
- <param name="offset" size="4" type="integer" value="33" label="FASTQ ASCII offset" />
+ <param format="fasta,fastqsolexa,fastqsanger" name="input" type="data" label="Library to analyse" />
+ <param name="offset" type="select" label="FASTQ ASCII offset">
+ <option value="33">33</option>
+ <option value="64">64</option>
+ </param>
</inputs>
<tests>
1
0
16 Sep '09
details: http://www.bx.psu.edu/hg/galaxy/rev/40c5e1853a66
changeset: 2691:40c5e1853a66
user: gua110
date: Mon Sep 14 17:03:17 2009 -0400
description:
Updating FASTX tool-set to the latest version v0.0.10
28 file(s) affected in this change:
static/fastx_icons/fasta_clipping_histogram_3.png
static/fastx_icons/fasta_clipping_histogram_4.png
test-data/fasta_collapser1.out
test-data/fasta_formatter1.fasta
test-data/fasta_formatter1.out
test-data/fasta_formatter2.out
test-data/fasta_nuc_changer1.fasta
test-data/fasta_nuc_changer1.out
test-data/fasta_nuc_changer2.fasta
test-data/fasta_nuc_changer2.out
test-data/fastq_stats1.out
tool-data/fastx_clipper_sequences.txt
tool_conf.xml.sample
tools/fastx_toolkit/fasta_clipping_histogram.xml
tools/fastx_toolkit/fasta_collapser.xml
tools/fastx_toolkit/fasta_formatter.xml
tools/fastx_toolkit/fasta_nucleotide_changer.xml
tools/fastx_toolkit/fastq_nucleotides_distribution.xml
tools/fastx_toolkit/fastq_qual_conv.xml
tools/fastx_toolkit/fastq_qual_stat.xml
tools/fastx_toolkit/fastq_quality_converter.xml
tools/fastx_toolkit/fastx_barcode_splitter.xml
tools/fastx_toolkit/fastx_barcode_splitter_galaxy_wrapper.sh
tools/fastx_toolkit/fastx_clipper.xml
tools/fastx_toolkit/fastx_collapser.xml
tools/fastx_toolkit/fastx_nucleotides_distribution.xml
tools/fastx_toolkit/fastx_quality_statistics.xml
tools/fastx_toolkit/fastx_renamer.xml
diffs (1772 lines):
diff -r 0f97b3048bc3 -r 40c5e1853a66 static/fastx_icons/fasta_clipping_histogram_3.png
Binary file static/fastx_icons/fasta_clipping_histogram_3.png has changed
diff -r 0f97b3048bc3 -r 40c5e1853a66 static/fastx_icons/fasta_clipping_histogram_4.png
Binary file static/fastx_icons/fasta_clipping_histogram_4.png has changed
diff -r 0f97b3048bc3 -r 40c5e1853a66 test-data/fasta_collapser1.out
--- a/test-data/fasta_collapser1.out Mon Sep 14 15:27:55 2009 -0400
+++ b/test-data/fasta_collapser1.out Mon Sep 14 17:03:17 2009 -0400
@@ -1,24 +1,24 @@
->1-3
+>1-15
+AGTACAAGGACATGC
+>2-11
+ATTGCTGCTCGGATGGTCCGGCTGTGCACAC
+>3-5
+TTCAACGCCGCCGTGAAC
+>4-3
CTGCTGCGATCGGTGTGC
->2-1
-TTACCTCACGATATTGTAATA
->3-1
-CCTTGTAGTGGATTCTGATGA
->4-1
-TGATTTCCAGAGCCAAT
->5-11
-ATTGCTGCTCGGATGGTCCGGCTGTGCACAC
+>5-1
+TCAAATTCTAGATTTTTACGG
>6-1
ACCATTCGAGCATAC
>7-1
+TGATTTCCAGAGCCAAT
+>8-1
+TTACCTCACGATATTGTAATA
+>9-1
+TGTATTTACAATGACTAGAAA
+>10-1
+CCTTGTAGTGGATTCTGATGA
+>11-1
CGATTGCCGAAGTCTACCA
->8-5
-TTCAACGCCGCCGTGAAC
->9-1
-ATGACTTCATCGTCCACCCTTTAGAACT
->10-15
-AGTACAAGGACATGC
->11-1
-TCAAATTCTAGATTTTTACGG
>12-1
-TGTATTTACAATGACTAGAAA
+ATGACTTCATCGTCCACCCTTTAGAACT
\ No newline at end of file
diff -r 0f97b3048bc3 -r 40c5e1853a66 test-data/fasta_formatter1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_formatter1.fasta Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,100 @@
+>Scaffold3648
+AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTC
+CCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTG
+TTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACA
+GTATAACTTTTCAAATACTTTTGTTTTACAACTTTTCTCTCTGGACTTAT
+ATTAAAGTCAATTTTAATGAACATGTAGTAAAAACTAATACATGTACATC
+TACAGTTTATTTATTTTTTTCTTCTTCTTTTTGTATTTCTTGTGTTACAT
+TATTTCACTTCACGTTCATGTTACCAACCTTGCCCCCTTGCTTTCCATGC
+AAAAAAAGAAAAAAAAGAAGCAATACTTACACTTACCCTTGAGATATCTT
+GATCTGAATGCTTTAACATTCTATATGTACAATAAATTTTTGTATCTATA
+GCCTATTATTATATATGTTGCTATGTCAGGCACATTGACAACATTCTCAG
+AAGGTTAGAAGATGGTATTGTTCTGAAATGCCTGGAATGCCTTGTGAACT
+AAGATGATTACTCATGTCATTAAAGTCCCCTAACCCAGGTATTTCCTCCT
+TCCCATGACGAAAACAGTCCATTTAAACTTCACCCCACTTTGGACCCGAA
+AGTGGGGTGCATTTTGGTGGTAAGCTCACCACAGAGCAAGAGAGAGTTAG
+AGTCCCTAATCTGCAGTGTAAACAAACTTTGCCAGGACATCACCAGCCCA
+ACCTTGATAAGTACTGCTTGGAACTCCTCCATGATGTTCTAGTCTTATTC
+GCAGTCTCATATAGGTTCGGATTTTGTCCATTCTCATAGCTACCAGTATA
+CATGGGAGATGCCAGTTTCATCTTCCTTGCTTCACTTTATAAGCATAGTT
+ATATCANGAACTTCCTGGTTATAATTATGTTCCTTTCAAGTTTCATCATA
+ATTGTCTAGTTCGATATAGTACATGGACACAATTAAATATGATATTGTCT
+>Scaffold9299
+CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG
+TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG
+GAAAAGCATCCTTGTTTGTTTCACTATGCTTTTTAATGGTTGACGTTAAa
+ggtaaagaccagtattggaaacgccccaatttcaaaaaatgaaatggaag
+ctctcattaccaatcatgtgaaagaatatgttttgactaatacatgatga
+taaaaaaattgccgggaaaccgcctactaattcatatatttagtaaattt
+gtttctctcatggtctgtgagagatatagggtagtcccatatacatcttt
+ctgtgtatagtgcttgtaactttacgaagaatgggccaaatttcttatca
+ttttgatgattccagaaccttgcagatgcgagatggtagatgatcaacct
+tttctgatcgattccataacgtttctttcacaatgcaatcgcatgaccat
+aactggtctttacctTTAAGTTGTAGGTCTTAATTGATAACACTATATAG
+TTTTTTTCTTTTTACTGTTTTTATTAATGACCTCTGTAATTTGCCCTATT
+GTGAAAATACTAAAATATGTTTATACGCCGATGATGCGGCAATATTTTGC
+CAAGGCAAAGAAATTGCCCTTGTTGAGAAAACTCTTAAATGTGAGTTTAA
+AAAAATAGTTGATCACATTGAAAAAGATGACTTAATGTTGAATATCAAGA
+AGTGTAAGATCATGTTATTTGGGACAAGAAAACGAATCAAAAATCAAAGT
+GTACGCTTGATTTACAGAGATAATGTTATCGAAGTTGTAAATGAATTTAA
+ATATCTTGGTGTATTATTTGATAATTATTTAAAGTGGGATATACATATAT
+CGAAAACTGCCTCCAAAATATCTAGAACCATATCATGTATAAAACGAATT
+AAATATTATTTGCCGAAAAGAATTTTAAAATTGTTATATGATAGTTTGAT
+ATTGTCACATATTGACTACGGTATTGTTTTGTGGGGATGTTCAGCAAAGT
+GTCATTTGGAAAAGTTACAAAAGTTACAAAATCGTTATGCCCGTTTAATA
+CTAAACGTAGATATTTTGACACCTCGTATTATATTATTATCCTCTCTAAG
+ATGGCAATCAGTTGTTCAGAGAGTGCAATACCAA
+>Scaffold9309
+GAAGGAAGAAGAGGAAAATAATGATGAATTTGTAGAATTTCTATAACGTA
+TGAAAACATAAACAACATGAAAAAGTATGAACCGACAGAAGAATGAAAAT
+TTCAATCATATAACATGTCATTCACTTCTCTTCTCTGACTGTCAAGTATT
+AGGTATTCCTTTTTATTTCCTCTTAAAATGATCATAGTTTCCTATTTCTT
+TTACACCATTGGGAAGGGAATTCCAATGTTTTATGGCATTGTAATAAAAC
+GAATTTCCAATACTACCTACTCTTTCTGGTAAGTTAAAGTTGAATCGGCT
+ATTTCTTGTATTATAATCATGTACGTCAGTAACAAGATCGAAGTTGGATC
+GAATATAATGATTCGACCTAGTATGATATATTTTATGCACGTGATGCAAT
+ACGAGTTGTTTTGATCTTTGGTCGACTTCAAGAAAACCAGCTTTAGAAAG
+TTCGCTGTAGCCAACATGAGTTCTTGCCTTGGACTAGAACAGTTGATAAA
+TCTCACCATTTTGTTCTTTAAGATGGGTAGAAGAATCCCTGCAATCTAAA
+TGGTCAATTACTGTGAAGTTATTTTTACTGGATGCACCCAATAttttttt
+gataatttttttttctttgataatttttttctttttctttaataaatttt
+ttggataatttttttttggataaatagttcttttttgataattctaataa
+tttttttatttattttttttttttctataattttttttaaaaaatttatt
+aatttttaattaaaaaaaaaataaGAGTTAACAGATTAAGGGAAACTGAC
+AATTCAAAAAAAAAAAAAA
+>Scaffold9310
+GCGGGGGCTGGGGAGGAAGGGGTGGCGTTATTTCACTTCCGATCTAATAC
+GCTTTCTTAAGACACTGAAATATCAGTAGGTATTGGTATAGAGAATTACT
+TTTTATTTTTAATTAAAACATTATCGAAATGAAGATACAGAGAAAAACGA
+TGAGATGTAAGAAGTGCGCGTATTTAtgtgtgtgggtgcgtgtgtgtgtg
+tgtgtgtgtgttgtgtgcgtgcgtgtgtgtggtggtgtgtACTAATTTTG
+ATGTGTGTTGTGGCACAATTGCAATCATCAGTATCTTCATGAAAATGATA
+ACCAGAAGCACAAAAAGGAGGgtgcgtgtgtgtgtgtgtgtgtttagtgt
+gcgtgcgtgtgagggtgtttaagtgtgtatgtCGGAAATGTGGCACAATT
+GCAATCATCTGTATCTTCATGAAAATGATAACCAGAAGAACAAAAAAAAA
+AAACATTGAGAGAACATGTTTTTTTGATGGAAGACAAGAAGTTCTCGTAA
+CGTAGGATCTCCGAGACATGATGGGGTCAACTTAAAAAGAGAGCAGTGAG
+AGGCATTTATATCGAAGGTCAGGGAAAGGCAAACAAAGAAAGAAAAAAAA
+AAGGCTCACAGGAGAACGAAAACACGGGCCAAAATAATAAACAGGAGCAA
+GTGAACGGGCAGTTTGGTAGCTACTTCATTTACCGGCTTTTAAaggtact
+atgtcccatttgcaggtcaaaaaaaatgaaaaagttaaattccaactgca
+tttgaaagataatactaatttacaacttccctaaaaaaggtggggcttga
+aaatgtcttcaagtgcggaaaataacgactattagttgtcaaatcgactt
+tagggCTATAGAGCCCAAAAGTAATAGTCTTGA
+>Scaffold11911
+TTCTTGGCACCCCCCCCCCCCCCACACTCCTGCACTGAAGAACTACTCAA
+GTTTAAACTTTGCATTGCTTTTCTTTCTTTTTCAGTATTTTTTGCTTGGT
+ACATGTTTCTCTTAATATCTGTCGTATAGatttttaatatttttatttat
+atCTACGTCAATCTGGCTGttctttttcttgtcttctttttttttctctc
+tcttttttttcctcgtattttGTATTGATCCTTACCCTAGTTTTTGAACT
+TGAACAGCAATTTGCAGCACTCAAATTTCTTTAAAATTACCTTCTCTTAT
+TTGtctctgttcccctctccccccctctctctctctctctctctctctct
+ctctctctctttcATCTCCCATATCATAATTTGAAGTACCATCTATGGTG
+TTTTCAGATTGATCTTTCTTGCTTTCCCCACCCTCCCCCTTTATGCAGTT
+AATTTTCAGTCTATTTGTGTTTTCTGTGGTTGATTCTAATCATATTCTAA
+CTCTTATTTTACATTTTACTTCACTAACAACTGGTTTATTATATTTGTTA
+CTAATTTTGAATTAAACTATTTACCATTCTGAACGAACTGAAAGATTAAA
+GATCAAACTATCTATGAATAGAATGGTATTTCTTCAATTTATTCAAATTT
+CTCTCTCTTTAACCCCCTTTTTCTGCTTGCATTTTTATCCCTTTGCCGTG
+GACTTCACTGGATATTTTGCTTTGATGCCAATCCAACAATTTTGCATATA
+TTA
diff -r 0f97b3048bc3 -r 40c5e1853a66 test-data/fasta_formatter1.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_formatter1.out Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,10 @@
+>Scaffold3648
+AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAGTATAACTTTTCAAATACTTTTGTTTTACAACTTTTCTCTCTGGACTTATATTAAAGTCAATTTTAATGAACATGTAGTAAAAACTAATACATGTACATCTACAGTTTATTTATTTTTTTCTTCTTCTTTTTGTATTTCTTGTGTTACATTATTTCACTTCACGTTCATGTTACCAACCTTGCCCCCTTGCTTTCCATGCAAAAAAAGAAAAAAAAGAAGCAATACTTACACTTACCCTTGAGATATCTTGATCTGAATGCTTTAACATTCTATATGTACAATAAATTTTTGTATCTATAGCCTATTATTATATATGTTGCTATGTCAGGCACATTGACAACATTCTCAGAAGGTTAGAAGATGGTATTGTTCTGAAATGCCTGGAATGCCTTGTGAACTAAGATGATTACTCATGTCATTAAAGTCCCCTAACCCAGGTATTTCCTCCTTCCCATGACGAAAACAGTCCATTTAAACTTCACCCCACTTTGGACCCGAAAGTGGGGTGCATTTTGGTGGTAAGCTCACCACAGAGCAAGAGAGAGTTAGAGTCCCTAATCTGCAGTGTAAACAAACTTTGCCAGGACATCACCAGCCCAACCTTGATAAGTACTGCTTGGAACTCCTCCATGATGTTCTAGTCTTATTCGCAGTCTCATATAGGTTCGGATTTTGTCCATTCTCATAGCTACCAGTATACATGGGAGATGCCAGTTTCATCTTCCTTGCTTCACTTTATAAGCATAGTTATATCANGAACTTCCTGGTTATAATTATGTTCCTTTCAAGTTTCATCATAATTGTCTAGTTCGATATAGTACATGGACACAATTAAATA
TGATATTGTCT
+>Scaffold9299
+CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGGAAAAGCATCCTTGTTTGTTTCACTATGCTTTTTAATGGTTGACGTTAAaggtaaagaccagtattggaaacgccccaatttcaaaaaatgaaatggaagctctcattaccaatcatgtgaaagaatatgttttgactaatacatgatgataaaaaaattgccgggaaaccgcctactaattcatatatttagtaaatttgtttctctcatggtctgtgagagatatagggtagtcccatatacatctttctgtgtatagtgcttgtaactttacgaagaatgggccaaatttcttatcattttgatgattccagaaccttgcagatgcgagatggtagatgatcaaccttttctgatcgattccataacgtttctttcacaatgcaatcgcatgaccataactggtctttacctTTAAGTTGTAGGTCTTAATTGATAACACTATATAGTTTTTTTCTTTTTACTGTTTTTATTAATGACCTCTGTAATTTGCCCTATTGTGAAAATACTAAAATATGTTTATACGCCGATGATGCGGCAATATTTTGCCAAGGCAAAGAAATTGCCCTTGTTGAGAAAACTCTTAAATGTGAGTTTAAAAAAATAGTTGATCACATTGAAAAAGATGACTTAATGTTGAATATCAAGAAGTGTAAGATCATGTTATTTGGGACAAGAAAACGAATCAAAAATCAAAGTGTACGCTTGATTTACAGAGATAATGTTATCGAAGTTGTAAATGAATTTAAATATCTTGGTGTATTATTTGATAATTATTTAAAGTGGGATATACATATATCGAAAACTGCCTCCAAAATATCTAGAACCATATCATGTATAAAACGAATTAAATATTATTTGCCGAAAAGAATTTTAAAATTGTTATAT
GATAGTTTGATATTGTCACATATTGACTACGGTATTGTTTTGTGGGGATGTTCAGCAAAGTGTCATTTGGAAAAGTTACAAAAGTTACAAAATCGTTATGCCCGTTTAATACTAAACGTAGATATTTTGACACCTCGTATTATATTATTATCCTCTCTAAGATGGCAATCAGTTGTTCAGAGAGTGCAATACCAA
+>Scaffold9309
+GAAGGAAGAAGAGGAAAATAATGATGAATTTGTAGAATTTCTATAACGTATGAAAACATAAACAACATGAAAAAGTATGAACCGACAGAAGAATGAAAATTTCAATCATATAACATGTCATTCACTTCTCTTCTCTGACTGTCAAGTATTAGGTATTCCTTTTTATTTCCTCTTAAAATGATCATAGTTTCCTATTTCTTTTACACCATTGGGAAGGGAATTCCAATGTTTTATGGCATTGTAATAAAACGAATTTCCAATACTACCTACTCTTTCTGGTAAGTTAAAGTTGAATCGGCTATTTCTTGTATTATAATCATGTACGTCAGTAACAAGATCGAAGTTGGATCGAATATAATGATTCGACCTAGTATGATATATTTTATGCACGTGATGCAATACGAGTTGTTTTGATCTTTGGTCGACTTCAAGAAAACCAGCTTTAGAAAGTTCGCTGTAGCCAACATGAGTTCTTGCCTTGGACTAGAACAGTTGATAAATCTCACCATTTTGTTCTTTAAGATGGGTAGAAGAATCCCTGCAATCTAAATGGTCAATTACTGTGAAGTTATTTTTACTGGATGCACCCAATAtttttttgataatttttttttctttgataatttttttctttttctttaataaattttttggataatttttttttggataaatagttcttttttgataattctaataatttttttatttattttttttttttctataattttttttaaaaaatttattaatttttaattaaaaaaaaaataaGAGTTAACAGATTAAGGGAAACTGACAATTCAAAAAAAAAAAAAA
+>Scaffold9310
+GCGGGGGCTGGGGAGGAAGGGGTGGCGTTATTTCACTTCCGATCTAATACGCTTTCTTAAGACACTGAAATATCAGTAGGTATTGGTATAGAGAATTACTTTTTATTTTTAATTAAAACATTATCGAAATGAAGATACAGAGAAAAACGATGAGATGTAAGAAGTGCGCGTATTTAtgtgtgtgggtgcgtgtgtgtgtgtgtgtgtgtgttgtgtgcgtgcgtgtgtgtggtggtgtgtACTAATTTTGATGTGTGTTGTGGCACAATTGCAATCATCAGTATCTTCATGAAAATGATAACCAGAAGCACAAAAAGGAGGgtgcgtgtgtgtgtgtgtgtgtttagtgtgcgtgcgtgtgagggtgtttaagtgtgtatgtCGGAAATGTGGCACAATTGCAATCATCTGTATCTTCATGAAAATGATAACCAGAAGAACAAAAAAAAAAAACATTGAGAGAACATGTTTTTTTGATGGAAGACAAGAAGTTCTCGTAACGTAGGATCTCCGAGACATGATGGGGTCAACTTAAAAAGAGAGCAGTGAGAGGCATTTATATCGAAGGTCAGGGAAAGGCAAACAAAGAAAGAAAAAAAAAAGGCTCACAGGAGAACGAAAACACGGGCCAAAATAATAAACAGGAGCAAGTGAACGGGCAGTTTGGTAGCTACTTCATTTACCGGCTTTTAAaggtactatgtcccatttgcaggtcaaaaaaaatgaaaaagttaaattccaactgcatttgaaagataatactaatttacaacttccctaaaaaaggtggggcttgaaaatgtcttcaagtgcggaaaataacgactattagttgtcaaatcgactttagggCTATAGAGCCCAAAAGTAATAGTCTTGA
+>Scaffold11911
+TTCTTGGCACCCCCCCCCCCCCCACACTCCTGCACTGAAGAACTACTCAAGTTTAAACTTTGCATTGCTTTTCTTTCTTTTTCAGTATTTTTTGCTTGGTACATGTTTCTCTTAATATCTGTCGTATAGatttttaatatttttatttatatCTACGTCAATCTGGCTGttctttttcttgtcttctttttttttctctctcttttttttcctcgtattttGTATTGATCCTTACCCTAGTTTTTGAACTTGAACAGCAATTTGCAGCACTCAAATTTCTTTAAAATTACCTTCTCTTATTTGtctctgttcccctctccccccctctctctctctctctctctctctctctctctctctttcATCTCCCATATCATAATTTGAAGTACCATCTATGGTGTTTTCAGATTGATCTTTCTTGCTTTCCCCACCCTCCCCCTTTATGCAGTTAATTTTCAGTCTATTTGTGTTTTCTGTGGTTGATTCTAATCATATTCTAACTCTTATTTTACATTTTACTTCACTAACAACTGGTTTATTATATTTGTTACTAATTTTGAATTAAACTATTTACCATTCTGAACGAACTGAAAGATTAAAGATCAAACTATCTATGAATAGAATGGTATTTCTTCAATTTATTCAAATTTCTCTCTCTTTAACCCCCTTTTTCTGCTTGCATTTTTATCCCTTTGCCGTGGACTTCACTGGATATTTTGCTTTGATGCCAATCCAACAATTTTGCATATATTA
diff -r 0f97b3048bc3 -r 40c5e1853a66 test-data/fasta_formatter2.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_formatter2.out Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,84 @@
+>Scaffold3648
+AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCA
+GGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGG
+TATTGTGCATTATATGACCTGTAATACACAGTATAACTTTTCAAATACTTTTGTTTTACA
+ACTTTTCTCTCTGGACTTATATTAAAGTCAATTTTAATGAACATGTAGTAAAAACTAATA
+CATGTACATCTACAGTTTATTTATTTTTTTCTTCTTCTTTTTGTATTTCTTGTGTTACAT
+TATTTCACTTCACGTTCATGTTACCAACCTTGCCCCCTTGCTTTCCATGCAAAAAAAGAA
+AAAAAAGAAGCAATACTTACACTTACCCTTGAGATATCTTGATCTGAATGCTTTAACATT
+CTATATGTACAATAAATTTTTGTATCTATAGCCTATTATTATATATGTTGCTATGTCAGG
+CACATTGACAACATTCTCAGAAGGTTAGAAGATGGTATTGTTCTGAAATGCCTGGAATGC
+CTTGTGAACTAAGATGATTACTCATGTCATTAAAGTCCCCTAACCCAGGTATTTCCTCCT
+TCCCATGACGAAAACAGTCCATTTAAACTTCACCCCACTTTGGACCCGAAAGTGGGGTGC
+ATTTTGGTGGTAAGCTCACCACAGAGCAAGAGAGAGTTAGAGTCCCTAATCTGCAGTGTA
+AACAAACTTTGCCAGGACATCACCAGCCCAACCTTGATAAGTACTGCTTGGAACTCCTCC
+ATGATGTTCTAGTCTTATTCGCAGTCTCATATAGGTTCGGATTTTGTCCATTCTCATAGC
+TACCAGTATACATGGGAGATGCCAGTTTCATCTTCCTTGCTTCACTTTATAAGCATAGTT
+ATATCANGAACTTCCTGGTTATAATTATGTTCCTTTCAAGTTTCATCATAATTGTCTAGT
+TCGATATAGTACATGGACACAATTAAATATGATATTGTCT
+>Scaffold9299
+CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCA
+TAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGGAAAAGCATCCTTGTTTGTT
+TCACTATGCTTTTTAATGGTTGACGTTAAaggtaaagaccagtattggaaacgccccaat
+ttcaaaaaatgaaatggaagctctcattaccaatcatgtgaaagaatatgttttgactaa
+tacatgatgataaaaaaattgccgggaaaccgcctactaattcatatatttagtaaattt
+gtttctctcatggtctgtgagagatatagggtagtcccatatacatctttctgtgtatag
+tgcttgtaactttacgaagaatgggccaaatttcttatcattttgatgattccagaacct
+tgcagatgcgagatggtagatgatcaaccttttctgatcgattccataacgtttctttca
+caatgcaatcgcatgaccataactggtctttacctTTAAGTTGTAGGTCTTAATTGATAA
+CACTATATAGTTTTTTTCTTTTTACTGTTTTTATTAATGACCTCTGTAATTTGCCCTATT
+GTGAAAATACTAAAATATGTTTATACGCCGATGATGCGGCAATATTTTGCCAAGGCAAAG
+AAATTGCCCTTGTTGAGAAAACTCTTAAATGTGAGTTTAAAAAAATAGTTGATCACATTG
+AAAAAGATGACTTAATGTTGAATATCAAGAAGTGTAAGATCATGTTATTTGGGACAAGAA
+AACGAATCAAAAATCAAAGTGTACGCTTGATTTACAGAGATAATGTTATCGAAGTTGTAA
+ATGAATTTAAATATCTTGGTGTATTATTTGATAATTATTTAAAGTGGGATATACATATAT
+CGAAAACTGCCTCCAAAATATCTAGAACCATATCATGTATAAAACGAATTAAATATTATT
+TGCCGAAAAGAATTTTAAAATTGTTATATGATAGTTTGATATTGTCACATATTGACTACG
+GTATTGTTTTGTGGGGATGTTCAGCAAAGTGTCATTTGGAAAAGTTACAAAAGTTACAAA
+ATCGTTATGCCCGTTTAATACTAAACGTAGATATTTTGACACCTCGTATTATATTATTAT
+CCTCTCTAAGATGGCAATCAGTTGTTCAGAGAGTGCAATACCAA
+>Scaffold9309
+GAAGGAAGAAGAGGAAAATAATGATGAATTTGTAGAATTTCTATAACGTATGAAAACATA
+AACAACATGAAAAAGTATGAACCGACAGAAGAATGAAAATTTCAATCATATAACATGTCA
+TTCACTTCTCTTCTCTGACTGTCAAGTATTAGGTATTCCTTTTTATTTCCTCTTAAAATG
+ATCATAGTTTCCTATTTCTTTTACACCATTGGGAAGGGAATTCCAATGTTTTATGGCATT
+GTAATAAAACGAATTTCCAATACTACCTACTCTTTCTGGTAAGTTAAAGTTGAATCGGCT
+ATTTCTTGTATTATAATCATGTACGTCAGTAACAAGATCGAAGTTGGATCGAATATAATG
+ATTCGACCTAGTATGATATATTTTATGCACGTGATGCAATACGAGTTGTTTTGATCTTTG
+GTCGACTTCAAGAAAACCAGCTTTAGAAAGTTCGCTGTAGCCAACATGAGTTCTTGCCTT
+GGACTAGAACAGTTGATAAATCTCACCATTTTGTTCTTTAAGATGGGTAGAAGAATCCCT
+GCAATCTAAATGGTCAATTACTGTGAAGTTATTTTTACTGGATGCACCCAATAttttttt
+gataatttttttttctttgataatttttttctttttctttaataaattttttggataatt
+tttttttggataaatagttcttttttgataattctaataatttttttatttatttttttt
+ttttctataattttttttaaaaaatttattaatttttaattaaaaaaaaaataaGAGTTA
+ACAGATTAAGGGAAACTGACAATTCAAAAAAAAAAAAAA
+>Scaffold9310
+GCGGGGGCTGGGGAGGAAGGGGTGGCGTTATTTCACTTCCGATCTAATACGCTTTCTTAA
+GACACTGAAATATCAGTAGGTATTGGTATAGAGAATTACTTTTTATTTTTAATTAAAACA
+TTATCGAAATGAAGATACAGAGAAAAACGATGAGATGTAAGAAGTGCGCGTATTTAtgtg
+tgtgggtgcgtgtgtgtgtgtgtgtgtgtgttgtgtgcgtgcgtgtgtgtggtggtgtgt
+ACTAATTTTGATGTGTGTTGTGGCACAATTGCAATCATCAGTATCTTCATGAAAATGATA
+ACCAGAAGCACAAAAAGGAGGgtgcgtgtgtgtgtgtgtgtgtttagtgtgcgtgcgtgt
+gagggtgtttaagtgtgtatgtCGGAAATGTGGCACAATTGCAATCATCTGTATCTTCAT
+GAAAATGATAACCAGAAGAACAAAAAAAAAAAACATTGAGAGAACATGTTTTTTTGATGG
+AAGACAAGAAGTTCTCGTAACGTAGGATCTCCGAGACATGATGGGGTCAACTTAAAAAGA
+GAGCAGTGAGAGGCATTTATATCGAAGGTCAGGGAAAGGCAAACAAAGAAAGAAAAAAAA
+AAGGCTCACAGGAGAACGAAAACACGGGCCAAAATAATAAACAGGAGCAAGTGAACGGGC
+AGTTTGGTAGCTACTTCATTTACCGGCTTTTAAaggtactatgtcccatttgcaggtcaa
+aaaaaatgaaaaagttaaattccaactgcatttgaaagataatactaatttacaacttcc
+ctaaaaaaggtggggcttgaaaatgtcttcaagtgcggaaaataacgactattagttgtc
+aaatcgactttagggCTATAGAGCCCAAAAGTAATAGTCTTGA
+>Scaffold11911
+TTCTTGGCACCCCCCCCCCCCCCACACTCCTGCACTGAAGAACTACTCAAGTTTAAACTT
+TGCATTGCTTTTCTTTCTTTTTCAGTATTTTTTGCTTGGTACATGTTTCTCTTAATATCT
+GTCGTATAGatttttaatatttttatttatatCTACGTCAATCTGGCTGttctttttctt
+gtcttctttttttttctctctcttttttttcctcgtattttGTATTGATCCTTACCCTAG
+TTTTTGAACTTGAACAGCAATTTGCAGCACTCAAATTTCTTTAAAATTACCTTCTCTTAT
+TTGtctctgttcccctctccccccctctctctctctctctctctctctctctctctctct
+ttcATCTCCCATATCATAATTTGAAGTACCATCTATGGTGTTTTCAGATTGATCTTTCTT
+GCTTTCCCCACCCTCCCCCTTTATGCAGTTAATTTTCAGTCTATTTGTGTTTTCTGTGGT
+TGATTCTAATCATATTCTAACTCTTATTTTACATTTTACTTCACTAACAACTGGTTTATT
+ATATTTGTTACTAATTTTGAATTAAACTATTTACCATTCTGAACGAACTGAAAGATTAAA
+GATCAAACTATCTATGAATAGAATGGTATTTCTTCAATTTATTCAAATTTCTCTCTCTTT
+AACCCCCTTTTTCTGCTTGCATTTTTATCCCTTTGCCGTGGACTTCACTGGATATTTTGC
+TTTGATGCCAATCCAACAATTTTGCATATATTA
diff -r 0f97b3048bc3 -r 40c5e1853a66 test-data/fasta_nuc_changer1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_nuc_changer1.fasta Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,50 @@
+>cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7
+TGAGGTAGTAGGTTGTATAGTT
+>cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4
+TCCCTGAGACCTCAAGTGTGA
+>cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1
+TGGAATGTAAAGAAGTATGTA
+>cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2
+TATCACAGCCAGCTTTGATGTGC
+>cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34
+AGGCAGTGTGGTTAGCTGGTTG
+>cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35
+TCACCGGGTGGAAACTAGCAGT
+>cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36
+TCACCGGGTGAAAATTCGCATG
+>cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37
+TCACCGGGTGAACACTTGCAGT
+>cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38
+TCACCGGGAGAAAAACTGGAGT
+>cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39
+TCACCGGGTGTAAATCAGCTTG
+>cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40
+TCACCGGGTGTACATCAGCTAA
+>cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41
+TCACCGGGTGAAAAATCACCTA
+>cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42
+TCACCGGGTTAACATCTACAGA
+>cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43
+TATCACAGTTTACTTGCTGTCGC
+>cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44
+TGACTAGAGACACATTCAGCT
+>cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45
+TGACTAGAGACACATTCAGCT
+>cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46
+TGTCATGGAGTCGCTCTCTTCA
+>cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47
+TGTCATGGAGGCGCTCTCTTCA
+>cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48
+TGAGGTAGGCTCAGTAGATGCGA
+>cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49
+AAGCACCACGAGAAGCTGCAGA
+>cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50
+TGATATGTCTGGTATTCTTGGG
+>cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51
+TACCCGTAGCTCCTATCCATGTT
+>cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52
+CACCCGTACATATGTTTCCGTGCT
+>cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53
+CACCCGTACATTTGTTTCCGTGCT
+>cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54
+TACCCGTAATCTTCATAATCCGAG
diff -r 0f97b3048bc3 -r 40c5e1853a66 test-data/fasta_nuc_changer1.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_nuc_changer1.out Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,50 @@
+>cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7
+UGAGGUAGUAGGUUGUAUAGUU
+>cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4
+UCCCUGAGACCUCAAGUGUGA
+>cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1
+UGGAAUGUAAAGAAGUAUGUA
+>cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2
+UAUCACAGCCAGCUUUGAUGUGC
+>cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34
+AGGCAGUGUGGUUAGCUGGUUG
+>cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35
+UCACCGGGUGGAAACUAGCAGU
+>cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36
+UCACCGGGUGAAAAUUCGCAUG
+>cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37
+UCACCGGGUGAACACUUGCAGU
+>cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38
+UCACCGGGAGAAAAACUGGAGU
+>cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39
+UCACCGGGUGUAAAUCAGCUUG
+>cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40
+UCACCGGGUGUACAUCAGCUAA
+>cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41
+UCACCGGGUGAAAAAUCACCUA
+>cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42
+UCACCGGGUUAACAUCUACAGA
+>cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43
+UAUCACAGUUUACUUGCUGUCGC
+>cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44
+UGACUAGAGACACAUUCAGCU
+>cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45
+UGACUAGAGACACAUUCAGCU
+>cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46
+UGUCAUGGAGUCGCUCUCUUCA
+>cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47
+UGUCAUGGAGGCGCUCUCUUCA
+>cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48
+UGAGGUAGGCUCAGUAGAUGCGA
+>cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49
+AAGCACCACGAGAAGCUGCAGA
+>cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50
+UGAUAUGUCUGGUAUUCUUGGG
+>cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51
+UACCCGUAGCUCCUAUCCAUGUU
+>cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52
+CACCCGUACAUAUGUUUCCGUGCU
+>cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53
+CACCCGUACAUUUGUUUCCGUGCU
+>cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54
+UACCCGUAAUCUUCAUAAUCCGAG
diff -r 0f97b3048bc3 -r 40c5e1853a66 test-data/fasta_nuc_changer2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_nuc_changer2.fasta Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,50 @@
+>cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7
+UGAGGUAGUAGGUUGUAUAGUU
+>cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4
+UCCCUGAGACCUCAAGUGUGA
+>cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1
+UGGAAUGUAAAGAAGUAUGUA
+>cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2
+UAUCACAGCCAGCUUUGAUGUGC
+>cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34
+AGGCAGUGUGGUUAGCUGGUUG
+>cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35
+UCACCGGGUGGAAACUAGCAGU
+>cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36
+UCACCGGGUGAAAAUUCGCAUG
+>cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37
+UCACCGGGUGAACACUUGCAGU
+>cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38
+UCACCGGGAGAAAAACUGGAGU
+>cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39
+UCACCGGGUGUAAAUCAGCUUG
+>cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40
+UCACCGGGUGUACAUCAGCUAA
+>cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41
+UCACCGGGUGAAAAAUCACCUA
+>cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42
+UCACCGGGUUAACAUCUACAGA
+>cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43
+UAUCACAGUUUACUUGCUGUCGC
+>cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44
+UGACUAGAGACACAUUCAGCU
+>cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45
+UGACUAGAGACACAUUCAGCU
+>cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46
+UGUCAUGGAGUCGCUCUCUUCA
+>cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47
+UGUCAUGGAGGCGCUCUCUUCA
+>cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48
+UGAGGUAGGCUCAGUAGAUGCGA
+>cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49
+AAGCACCACGAGAAGCUGCAGA
+>cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50
+UGAUAUGUCUGGUAUUCUUGGG
+>cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51
+UACCCGUAGCUCCUAUCCAUGUU
+>cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52
+CACCCGUACAUAUGUUUCCGUGCU
+>cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53
+CACCCGUACAUUUGUUUCCGUGCU
+>cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54
+UACCCGUAAUCUUCAUAAUCCGAG
diff -r 0f97b3048bc3 -r 40c5e1853a66 test-data/fasta_nuc_changer2.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_nuc_changer2.out Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,50 @@
+>cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7
+TGAGGTAGTAGGTTGTATAGTT
+>cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4
+TCCCTGAGACCTCAAGTGTGA
+>cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1
+TGGAATGTAAAGAAGTATGTA
+>cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2
+TATCACAGCCAGCTTTGATGTGC
+>cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34
+AGGCAGTGTGGTTAGCTGGTTG
+>cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35
+TCACCGGGTGGAAACTAGCAGT
+>cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36
+TCACCGGGTGAAAATTCGCATG
+>cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37
+TCACCGGGTGAACACTTGCAGT
+>cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38
+TCACCGGGAGAAAAACTGGAGT
+>cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39
+TCACCGGGTGTAAATCAGCTTG
+>cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40
+TCACCGGGTGTACATCAGCTAA
+>cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41
+TCACCGGGTGAAAAATCACCTA
+>cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42
+TCACCGGGTTAACATCTACAGA
+>cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43
+TATCACAGTTTACTTGCTGTCGC
+>cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44
+TGACTAGAGACACATTCAGCT
+>cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45
+TGACTAGAGACACATTCAGCT
+>cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46
+TGTCATGGAGTCGCTCTCTTCA
+>cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47
+TGTCATGGAGGCGCTCTCTTCA
+>cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48
+TGAGGTAGGCTCAGTAGATGCGA
+>cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49
+AAGCACCACGAGAAGCTGCAGA
+>cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50
+TGATATGTCTGGTATTCTTGGG
+>cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51
+TACCCGTAGCTCCTATCCATGTT
+>cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52
+CACCCGTACATATGTTTCCGTGCT
+>cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53
+CACCCGTACATTTGTTTCCGTGCT
+>cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54
+TACCCGTAATCTTCATAATCCGAG
diff -r 0f97b3048bc3 -r 40c5e1853a66 test-data/fastq_stats1.out
--- a/test-data/fastq_stats1.out Mon Sep 14 15:27:55 2009 -0400
+++ b/test-data/fastq_stats1.out Mon Sep 14 17:03:17 2009 -0400
@@ -1,37 +1,37 @@
-column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count
-1 9 23 34 288 32.00 33 33 33 0 33 33 3 1 4 1 0
-2 9 28 33 287 31.89 31 33 33 2 28 33 3 3 2 1 0
-3 9 13 34 268 29.78 28 33 33 5 21 34 5 1 0 3 0
-4 9 17 33 261 29.00 30 33 33 3 26 33 1 2 3 3 0
-5 9 22 33 269 29.89 30 33 33 3 26 33 3 3 3 0 0
-6 9 22 33 277 30.78 30 33 33 3 26 33 5 3 0 1 0
-7 9 21 33 258 28.67 24 33 33 9 21 33 4 1 3 1 0
-8 9 12 33 263 29.22 32 33 33 1 31 33 2 1 1 5 0
-9 9 29 33 290 32.22 33 33 33 0 33 33 3 3 2 1 0
-10 9 23 33 277 30.78 32 33 33 1 31 33 1 4 2 2 0
-11 9 12 33 245 27.22 21 31 33 12 12 33 5 2 1 1 0
-12 9 13 33 214 23.78 15 24 33 18 13 33 2 4 2 1 0
-13 9 5 33 249 27.67 29 31 33 4 23 33 2 1 1 5 0
-14 9 5 33 233 25.89 24 33 33 9 11 33 3 3 2 1 0
-15 9 15 33 251 27.89 24 33 33 9 15 33 5 1 1 2 0
-16 9 23 34 269 29.89 24 33 33 9 23 34 3 1 2 3 0
-17 9 13 34 266 29.56 33 33 33 0 33 33 2 3 1 3 0
-18 9 21 34 272 30.22 31 33 33 2 28 34 0 5 1 3 0
-19 9 5 34 244 27.11 27 30 33 6 18 34 4 4 1 0 0
-20 9 11 34 241 26.78 23 32 33 10 11 34 3 4 2 0 0
-21 9 13 33 240 26.67 24 27 33 9 13 33 1 4 0 4 0
-22 9 5 33 190 21.11 13 21 33 20 5 33 1 4 0 3 1
-23 9 5 33 205 22.78 16 26 33 17 5 33 4 4 1 0 0
-24 9 5 33 247 27.44 28 31 33 5 21 33 1 5 1 2 0
-25 9 11 34 241 26.78 24 33 33 9 11 34 3 4 0 2 0
-26 9 5 33 212 23.56 18 31 33 15 5 33 0 6 0 3 0
-27 9 5 33 227 25.22 21 26 33 12 5 33 3 4 1 1 0
-28 9 21 33 255 28.33 24 31 33 9 21 33 2 4 3 0 0
-29 9 5 33 228 25.33 21 30 33 12 5 33 2 4 1 2 0
-30 9 10 33 213 23.67 16 28 33 17 10 33 3 4 2 0 0
-31 9 5 33 236 26.22 21 31 33 12 5 33 1 4 1 3 0
-32 9 5 33 210 23.33 12 29 33 21 5 33 3 3 0 3 0
-33 9 5 33 183 20.33 9 21 33 24 5 33 1 4 2 2 0
-34 9 5 33 150 16.67 7 17 22 15 5 33 3 4 1 1 0
-35 9 13 33 217 24.11 21 24 29 8 13 33 1 4 1 3 0
-36 9 5 33 195 21.67 18 21 32 14 5 33 3 2 1 3 0
+column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count Max_count
+1 9 23 34 288 32.00 33 33 33 0 33 33 3 1 4 1 0 9
+2 9 28 33 287 31.89 31 33 33 2 28 33 3 3 2 1 0 9
+3 9 13 34 268 29.78 28 33 33 5 21 34 5 1 0 3 0 9
+4 9 17 33 261 29.00 30 33 33 3 26 33 1 2 3 3 0 9
+5 9 22 33 269 29.89 30 33 33 3 26 33 3 3 3 0 0 9
+6 9 22 33 277 30.78 30 33 33 3 26 33 5 3 0 1 0 9
+7 9 21 33 258 28.67 24 33 33 9 21 33 4 1 3 1 0 9
+8 9 12 33 263 29.22 32 33 33 1 31 33 2 1 1 5 0 9
+9 9 29 33 290 32.22 33 33 33 0 33 33 3 3 2 1 0 9
+10 9 23 33 277 30.78 32 33 33 1 31 33 1 4 2 2 0 9
+11 9 12 33 245 27.22 21 31 33 12 12 33 5 2 1 1 0 9
+12 9 13 33 214 23.78 15 24 33 18 13 33 2 4 2 1 0 9
+13 9 5 33 249 27.67 29 31 33 4 23 33 2 1 1 5 0 9
+14 9 5 33 233 25.89 24 33 33 9 11 33 3 3 2 1 0 9
+15 9 15 33 251 27.89 24 33 33 9 15 33 5 1 1 2 0 9
+16 9 23 34 269 29.89 24 33 33 9 23 34 3 1 2 3 0 9
+17 9 13 34 266 29.56 33 33 33 0 33 33 2 3 1 3 0 9
+18 9 21 34 272 30.22 31 33 33 2 28 34 0 5 1 3 0 9
+19 9 5 34 244 27.11 27 30 33 6 18 34 4 4 1 0 0 9
+20 9 11 34 241 26.78 23 32 33 10 11 34 3 4 2 0 0 9
+21 9 13 33 240 26.67 24 27 33 9 13 33 1 4 0 4 0 9
+22 9 5 33 190 21.11 13 21 33 20 5 33 1 4 0 3 1 9
+23 9 5 33 205 22.78 16 26 33 17 5 33 4 4 1 0 0 9
+24 9 5 33 247 27.44 28 31 33 5 21 33 1 5 1 2 0 9
+25 9 11 34 241 26.78 24 33 33 9 11 34 3 4 0 2 0 9
+26 9 5 33 212 23.56 18 31 33 15 5 33 0 6 0 3 0 9
+27 9 5 33 227 25.22 21 26 33 12 5 33 3 4 1 1 0 9
+28 9 21 33 255 28.33 24 31 33 9 21 33 2 4 3 0 0 9
+29 9 5 33 228 25.33 21 30 33 12 5 33 2 4 1 2 0 9
+30 9 10 33 213 23.67 16 28 33 17 10 33 3 4 2 0 0 9
+31 9 5 33 236 26.22 21 31 33 12 5 33 1 4 1 3 0 9
+32 9 5 33 210 23.33 12 29 33 21 5 33 3 3 0 3 0 9
+33 9 5 33 183 20.33 9 21 33 24 5 33 1 4 2 2 0 9
+34 9 5 33 150 16.67 7 17 22 15 5 33 3 4 1 1 0 9
+35 9 13 33 217 24.11 21 24 29 8 13 33 1 4 1 3 0 9
+36 9 5 33 195 21.67 18 21 32 14 5 33 3 2 1 3 0 9
diff -r 0f97b3048bc3 -r 40c5e1853a66 tool-data/fastx_clipper_sequences.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fastx_clipper_sequences.txt Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,13 @@
+#
+# Adapter/Linker sequences for FASTX-Clipper tool.
+#
+# Format:
+# Adapter Sequence <TAB> Descriptive name
+#
+# Example:
+# AAATTTGATAAGATA Our-Adapter
+#
+# Some adapters can be found here:
+# http://seqanswers.com/forums/showthread.php?t=198
+
+TGTAGGCC Dummy-Adapter (don't use me)
diff -r 0f97b3048bc3 -r 40c5e1853a66 tool_conf.xml.sample
--- a/tool_conf.xml.sample Mon Sep 14 15:27:55 2009 -0400
+++ b/tool_conf.xml.sample Mon Sep 14 17:03:17 2009 -0400
@@ -299,24 +299,26 @@
<tool file="fasta_tools/tabular_to_fasta.xml" />
</section>
<section name="FASTA/Q Information" id="cshl_library_information">
- <tool file="fastx_toolkit/fastq_qual_stat.xml" />
+ <tool file="fastx_toolkit/fastx_quality_statistics.xml" />
<tool file="fastx_toolkit/fastq_quality_boxplot.xml" />
- <tool file="fastx_toolkit/fastq_nucleotides_distribution.xml" />
- <!-- <tool file="fastx_toolkit/fasta_clipping_histogram.xml" /> -->
+ <tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" />
+ <tool file="fastx_toolkit/fasta_clipping_histogram.xml" />
</section>
<section name="FASTA/Q Preprocessing" id="cshl_fastx_manipulation">
<tool file="fastx_toolkit/fastq_to_fasta.xml" />
- <tool file="fastx_toolkit/fastq_qual_conv.xml" />
- <!-- <tool file="fastx_toolkit/fastx_clipper.xml" /> -->
+ <tool file="fastx_toolkit/fastq_quality_converter.xml" />
+ <tool file="fastx_toolkit/fastx_clipper.xml" />
<tool file="fastx_toolkit/fastx_trimmer.xml" />
+ <tool file="fastx_toolkit/fastx_renamer.xml" />
<tool file="fastx_toolkit/fastx_reverse_complement.xml" />
+ <tool file="fastx_toolkit/fasta_formatter.xml" />
+ <tool file="fastx_toolkit/fasta_nucleotide_changer.xml" />
<tool file="fastx_toolkit/fastx_artifacts_filter.xml" />
<tool file="fastx_toolkit/fastq_quality_filter.xml" />
- <!-- <tool file="fastx_toolkit/fasta_collapser.xml" /> -->
- <!-- <tool file="fastx_toolkit/fastx_barcode_splitter.xml" /> -->
+ <tool file="fastx_toolkit/fastx_collapser.xml" />
+ <!--<tool file="fastx_toolkit/fastx_barcode_splitter.xml" />-->
</section>
-
<section name="Short Read QC and Manipulation" id="short_read_analysis">
<tool file="metag_tools/short_reads_figure_score.xml" />
<tool file="metag_tools/short_reads_figure_high_quality_length.xml" />
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fasta_clipping_histogram.xml
--- a/tools/fastx_toolkit/fasta_clipping_histogram.xml Mon Sep 14 15:27:55 2009 -0400
+++ b/tools/fastx_toolkit/fasta_clipping_histogram.xml Mon Sep 14 17:03:17 2009 -0400
@@ -13,7 +13,7 @@
**What it does**
-This tool creates a histogram image of sequence lengths distribution in a given fasta data set file.
+This tool creates a histogram image of sequence lengths distribution in a given fasta dataset file.
**TIP:** Use this tool after clipping your library (with **FASTX Clipper tool**), to visualize the clipping results.
@@ -21,17 +21,82 @@
**Output Examples**
-
In the following library, most sequences are 24-mers to 27-mers.
This could indicate an abundance of endo-siRNAs (depending of course of what you've tried to sequence in the first place).
-.. image:: ../static/fastx_icons/fasta_clipping_histogram_1.png
+.. image:: ./static/fastx_icons/fasta_clipping_histogram_1.png
In the following library, most sequences are 19,22 or 23-mers.
This could indicate an abundance of miRNAs (depending of course of what you've tried to sequence in the first place).
-.. image:: ../static/fastx_icons/fasta_clipping_histogram_2.png
+.. image:: ./static/fastx_icons/fasta_clipping_histogram_2.png
+
+
+-----
+
+
+**Input Formats**
+
+This tool accepts short-reads FASTA files. The reads don't have to be short, but they do have to be on a single line, like so::
+
+ >sequence1
+ AGTAGTAGGTGATGTAGAGAGAGAGAGAGTAG
+ >sequence2
+ GTGTGTGTGGGAAGTTGACACAGTA
+ >sequence3
+ CCTTGAGATTAACGCTAATCAAGTAAAC
+
+
+If the sequences span over multiple lines::
+
+ >sequence1
+ CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG
+ TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG
+ aactggtctttacctTTAAGTTG
+
+Use the **FASTA Width Formatter** tool to re-format the FASTA into a single-lined sequences::
+
+ >sequence1
+ CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG
+
+
+-----
+
+
+
+**Multiplicity counts (a.k.a reads-count)**
+
+If the sequence identifier (the text after the '>') contains a dash and a number, it is treated as a multiplicity count value (i.e. how many times that individual sequence repeated in the original FASTA file, before collapsing).
+
+Example 1 - The following FASTA file *does not* have multiplicity counts::
+
+ >seq1
+ GGATCC
+ >seq2
+ GGTCATGGGTTTAAA
+ >seq3
+ GGGATATATCCCCACACACACACAC
+
+Each sequence is counts as one, to produce the following chart:
+
+.. image:: ./static/fastx_icons/fasta_clipping_histogram_3.png
+
+
+Example 2 - The following FASTA file have multiplicity counts::
+
+ >seq1-2
+ GGATCC
+ >seq2-10
+ GGTCATGGGTTTAAA
+ >seq3-3
+ GGGATATATCCCCACACACACACAC
+
+The first sequence counts as 2, the second as 10, the third as 3, to produce the following chart:
+
+.. image:: ./static/fastx_icons/fasta_clipping_histogram_4.png
+
+Use the **FASTA Collapser** tool to create FASTA files with multiplicity counts.
</help>
</tool>
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fasta_collapser.xml
--- a/tools/fastx_toolkit/fasta_collapser.xml Mon Sep 14 15:27:55 2009 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="cshl_fasta_collapser" name="Collapse">
- <description>sequences</description>
- <command>fasta_collapser.pl $input $output</command>
-
- <inputs>
- <param format="fasta" name="input" type="data" label="Library to collapse" />
- </inputs>
-
- <tests>
- <test>
- <param name="input" value="fasta_collapser1.fasta" />
- <output name="output" file="fasta_collapser1.out" />
- </test>
- </tests>
-
- <outputs>
- <data format="fasta" name="output" metadata_source="input" />
- </outputs>
- <help>
-
-**What it does**
-
-This tool collapses identical sequences in a FASTA file into a single sequence.
-
---------
-
-**Example**
-
-Example Input File (Sequence "ATAT" appears multiple times)::
-
- >CSHL_2_FC0042AGLLOO_1_1_605_414
- TGCG
- >CSHL_2_FC0042AGLLOO_1_1_537_759
- ATAT
- >CSHL_2_FC0042AGLLOO_1_1_774_520
- TGGC
- >CSHL_2_FC0042AGLLOO_1_1_742_502
- ATAT
- >CSHL_2_FC0042AGLLOO_1_1_781_514
- TGAG
- >CSHL_2_FC0042AGLLOO_1_1_757_487
- TTCA
- >CSHL_2_FC0042AGLLOO_1_1_903_769
- ATAT
- >CSHL_2_FC0042AGLLOO_1_1_724_499
- ATAT
-
-Example Output file::
-
- >1-1
- TGCG
- >2-4
- ATAT
- >3-1
- TGGC
- >4-1
- TGAG
- >5-1
- TTCA
-
-.. class:: infomark
-
-Original Sequence Names / Lane descriptions (e.g. "CSHL_2_FC0042AGLLOO_1_1_742_502") are discarded.
-
-The output seqeunce name is composed of two numbers: the first is the sequence's number, the second is the multiplicity value.
-
-The following output::
-
- >2-4
- ATAT
-
-means that the sequence "ATAT" is the second sequence in the file, and it appeared 4 times in the input FASTA file.
-
-</help>
-</tool>
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fasta_formatter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastx_toolkit/fasta_formatter.xml Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,79 @@
+<tool id="cshl_fasta_formatter" name="FASTA Width">
+ <description>formatter</description>
+ <!--
+ Note:
+ fasta_formatter also has a tabular output mode (-t),
+ but Galaxy already contains such a tool, so no need
+ to offer the user a duplicated tool.
+
+ So this XML tool only changes the width (line-wrapping) of a
+ FASTA file.
+ -->
+ <command>zcat -f '$input' | fasta_formatter -w $width -o $output</command>
+ <inputs>
+ <param format="fasta" name="input" type="data" label="Library to re-format" />
+
+ <param name="width" type="integer" value="0" label="New width for nucleotides strings" help="Use 0 for single line outout." />
+ </inputs>
+
+ <tests>
+ <test>
+ <!-- Re-format a FASTA file into a single line -->
+ <param name="input" value="fasta_formatter1.fasta" />
+ <param name="width" value="0" />
+ <output name="output" file="fastx_formatter1.out" />
+ </test>
+ <test>
+ <!-- Re-format a FASTA file into multiple lines wrapping at 60 charactes -->
+ <param name="input" value="fasta_formatter1.fasta" />
+ <param name="width" value="60" />
+ <output name="output" file="fasta_formatter2.out" />
+ </test>
+ </tests>
+
+ <outputs>
+ <data format="input" name="output" metadata_source="input" />
+ </outputs>
+
+<help>
+**What it does**
+
+This tool re-formats a FASTA file, changing the width of the nucleotides lines.
+
+**TIP:** Outputting a single line (with **width = 0**) can be useful for scripting (with **grep**, **awk**, and **perl**). Every odd line is a sequence identifier, and every even line is a nucleotides line.
+
+--------
+
+**Example**
+
+Input FASTA file (each nucleotides line is 50 characters long)::
+
+ >Scaffold3648
+ AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTC
+ CCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTG
+ TTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACA
+ ATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT
+ >Scaffold9299
+ CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG
+ TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG
+ aactggtctttacctTTAAGTTG
+
+
+Output FASTA file (with width=80)::
+
+ >Scaffold3648
+ AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTT
+ ATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCA
+ ATTTTAATGAACATGTAGTAAAAACT
+ >Scaffold9299
+ CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTAC
+ GTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG
+
+Output FASTA file (with width=0 => single line)::
+
+ >Scaffold3648
+ AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT
+ >Scaffold9299
+ CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG
+</help>
+</tool>
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fasta_nucleotide_changer.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastx_toolkit/fasta_nucleotide_changer.xml Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,65 @@
+<tool id="cshl_fasta_nucleotides_changer" name="RNA/DNA" >
+ <description>converter</description>
+ <command>zcat -f '$input' | fasta_nucleotide_changer $mode -v -o $output</command>
+ <inputs>
+ <param format="fasta" name="input" type="data" label="Library to convert" />
+
+ <param name="mode" type="select" label="Convert">
+ <option value="-d">RNA to DNA (U to T)</option>
+ <option value="-r">DNA to RNA (T to U)</option>
+ </param>
+ </inputs>
+
+ <tests>
+ <test>
+ <!-- DNA-to-RNA -->
+ <param name="input" value="fasta_nuc_changer1.fasta" />
+ <param name="mode" value="-r" />
+ <output name="output" file="fasta_nuc_change1.out" />
+ </test>
+ <test>
+ <!-- RNA-to-DNA -->
+ <param name="input" value="fasta_nuc_changer2.fasta" />
+ <param name="mode" value="-d" />
+ <output name="output" file="fasta_nuc_change2.out" />
+ </test>
+ </tests>
+
+
+ <outputs>
+ <data format="input" name="output" metadata_source="input" />
+ </outputs>
+
+<help>
+**What it does**
+
+This tool converts RNA FASTA files to DNA (and vice-versa).
+
+In **RNA-to-DNA** mode, U's are changed into T's.
+
+In **DNA-to-RNA** mode, T's are changed into U's.
+
+--------
+
+**Example**
+
+Input RNA FASTA file ( from Sanger's mirBase )::
+
+ >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7
+ UGAGGUAGUAGGUUGUAUAGUU
+ >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4
+ UCCCUGAGACCUCAAGUGUGA
+ >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1
+ UGGAAUGUAAAGAAGUAUGUA
+
+Output DNA FASTA file (with RNA-to-DNA mode)::
+
+ >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7
+ TGAGGTAGTAGGTTGTATAGTT
+ >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4
+ TCCCTGAGACCTCAAGTGTGA
+ >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1
+ TGGAATGTAAAGAAGTATGTA
+
+</help>
+</tool>
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastq_nucleotides_distribution.xml
--- a/tools/fastx_toolkit/fastq_nucleotides_distribution.xml Mon Sep 14 15:27:55 2009 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-<tool id="cshl_fastq_nucleotides_distribution" name="Nucleotides Distribution">
- <description>chart</description>
- <command>fastq_nucleotide_distribution_graph.sh -t '$input.name' -i $input -o $output</command>
-
- <inputs>
- <param format="txt" name="input" type="data" label="Statistics Text File (output of 'FASTQ Statistics' tool)" />
- </inputs>
-
- <outputs>
- <data format="png" name="output" metadata_source="input" />
- </outputs>
-<help>
-
-**What it does**
-
-Creates a stacked-histogram graph for the nucleotide distribution in the Solexa library.
-
-.. class:: infomark
-
-**TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool.
-
------
-
-**Output Examples**
-
-
-
-The following chart clearly shows the barcode used at the 5'-end of the library: **GATCT**
-
-.. image:: ../static/fastx_icons/fastq_nucleotides_distribution_1.png
-
-
-
-
-
-
-
-In the following chart, one can almost 'read' the most abundant sequence by looking at the dominant values: **TGATA TCGTA TTGAT GACTG AA...**
-
-.. image:: ../static/fastx_icons/fastq_nucleotides_distribution_2.png
-
-
-
-
-
-
-
-
-The following chart shows a growing number of unknown (N) nucleotides towards later cycles (which might indicate a sequencing problem):
-
-.. image:: ../static/fastx_icons/fastq_nucleotides_distribution_3.png
-
-
-
-
-
-
-
-
-But most of the time, the chart will look rather random:
-
-.. image:: ../static/fastx_icons/fastq_nucleotides_distribution_4.png
-
-</help>
-</tool>
-<!-- FASTQ-Nucleotides-Distribution is part of the FASTX-toolkit, by A.Gordon (gordon(a)cshl.edu) -->
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastq_qual_conv.xml
--- a/tools/fastx_toolkit/fastq_qual_conv.xml Mon Sep 14 15:27:55 2009 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-<tool id="cshl_fastq_qual_conv" name="Quality format converter">
- <description>(ASCII-Numeric)</description>
- <command>zcat -f $input | fastq_quality_converter $QUAL_FORMAT -o $output</command>
- <inputs>
- <param format="fastqsolexa" name="input" type="data" label="Library to convert" />
-
- <param name="QUAL_FORMAT" type="select" label="Desired output format">
- <option value="-a">ASCII (letters) quality scores</option>
- <option value="-n">Numeric quality scores</option>
- </param>
- </inputs>
-
- <tests>
- <test>
- <!-- ASCII to NUMERIC -->
- <param name="input" value="fastq_qual_conv1.fastq" />
- <param name="QUAL_FORMAT" value="Numeric quality scores" />
- <output name="output" file="fastq_qual_conv1.out" />
- </test>
- <test>
- <!-- ASCII to ASCII (basically, a no-op, but it should still produce a valid output -->
- <param name="input" value="fastq_qual_conv1.fastq" />
- <param name="QUAL_FORMAT" value="ASCII (letters) quality scores" />
- <output name="output" file="fastq_qual_conv1a.out" />
- </test>
- <test>
- <!-- NUMERIC to ASCII -->
- <param name="input" value="fastq_qual_conv2.fastq" />
- <param name="QUAL_FORMAT" value="ASCII (letters) quality scores" />
- <output name="output" file="fastq_qual_conv2.out" />
- </test>
- <test>
- <!-- NUMERIC to NUMERIC (basically, a no-op, but it should still produce a valid output -->
- <param name="input" value="fastq_qual_conv2.fastq" />
- <param name="QUAL_FORMAT" value="Numeric quality scores" />
- <output name="output" file="fastq_qual_conv2n.out" />
- </test>
- </tests>
-
- <outputs>
- <data format="fastqsolexa" name="output" metadata_source="input" />
- </outputs>
-<help>
-
-**What it does**
-
-Converts a solexa FASTQ file to/from numeric or ASCII quality format.
-
-.. class:: warningmark
-
-Re-scaling is **not** performed. (e.g. conversion from Phred scale to Solexa scale).
-
-
------
-
-FASTQ with Numeric quality scores::
-
- @CSHL__2_FC042AGWWWXX:8:1:120:202
- ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC
- +CSHL__2_FC042AGWWWXX:8:1:120:202
- 40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8
- @CSHL__2_FC042AGWWWXX:8:1:103:1185
- ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC
- +CSHL__2_FC042AGWWWXX:8:1:103:1185
- 40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2
-
-
-FASTQ with ASCII quality scores::
-
- @CSHL__2_FC042AGWWWXX:8:1:120:202
- ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC
- +CSHL__2_FC042AGWWWXX:8:1:120:202
- hhhhThhhhFhh\hhYhTh?^hN[hHACG?KJ?UJH
- @CSHL__2_FC042AGWWWXX:8:1:103:1185
- ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC
- +CSHL__2_FC042AGWWWXX:8:1:103:1185
- hhhhhca_hhh`^Vh@IVQNHdObVLWCJ@HBDY^B
-
-
-</help>
-</tool>
-<!-- FASTQ-Quality-Converter is part of the FASTX-toolkit, by A.Gordon (gordon(a)cshl.edu) -->
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastq_qual_stat.xml
--- a/tools/fastx_toolkit/fastq_qual_stat.xml Mon Sep 14 15:27:55 2009 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-<tool id="cshl_fastq_qual_stat" name="Quality Statistics">
- <description></description>
- <command>zcat -f $input | fastq_quality_stats -o $output</command>
-
- <inputs>
- <param format="fastqsolexa" name="input" type="data" label="Library to analyse" />
- </inputs>
-
- <tests>
- <test>
- <param name="input" value="fastq_stats1.fastq" />
- <output name="output" file="fastq_stats1.out" />
- </test>
- </tests>
-
- <outputs>
- <data format="txt" name="output" metadata_source="input" />
- </outputs>
-
-<help>
-
-**What it does**
-
-Creates quality statistics report for the given Solexa/FASTQ library.
-
-.. class:: infomark
-
-**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools.
-
------
-
-**The output file will contain the following fields:**
-
-* column = column number (1 to 36 for a 36-cycles read solexa file)
-* count = number of bases found in this column.
-* min = Lowest quality score value found in this column.
-* max = Highest quality score value found in this column.
-* sum = Sum of quality score values for this column.
-* mean = Mean quality score value for this column.
-* Q1 = 1st quartile quality score.
-* med = Median quality score.
-* Q3 = 3rd quartile quality score.
-* IQR = Inter-Quartile range (Q3-Q1).
-* lW = 'Left-Whisker' value (for boxplotting).
-* rW = 'Right-Whisker' value (for boxplotting).
-* A_Count = Count of 'A' nucleotides found in this column.
-* C_Count = Count of 'C' nucleotides found in this column.
-* G_Count = Count of 'G' nucleotides found in this column.
-* T_Count = Count of 'T' nucleotides found in this column.
-* N_Count = Count of 'N' nucleotides found in this column.
-
-
-
-
-
-
-**Output Example**::
-
- column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count
- 1 6362991 -4 40 250734117 39.41 40 40 40 0 40 40 1396976 1329101 678730 2958184 0
- 2 6362991 -5 40 250531036 39.37 40 40 40 0 40 40 1786786 1055766 1738025 1782414 0
- 3 6362991 -5 40 248722469 39.09 40 40 40 0 40 40 2296384 984875 1443989 1637743 0
- 4 6362991 -5 40 247654797 38.92 40 40 40 0 40 40 1683197 1410855 1722633 1546306 0
- 5 6362991 -4 40 248214827 39.01 40 40 40 0 40 40 2536861 1167423 1248968 1409739 0
- 6 6362991 -5 40 248499903 39.05 40 40 40 0 40 40 1598956 1236081 1568608 1959346 0
- 7 6362991 -4 40 247719760 38.93 40 40 40 0 40 40 1692667 1822140 1496741 1351443 0
- 8 6362991 -5 40 245745205 38.62 40 40 40 0 40 40 2230936 1343260 1529928 1258867 0
- 9 6362991 -5 40 245766735 38.62 40 40 40 0 40 40 1702064 1306257 1336511 2018159 0
- 10 6362991 -5 40 245089706 38.52 40 40 40 0 40 40 1519917 1446370 1450995 1945709 0
- 11 6362991 -5 40 242641359 38.13 40 40 40 0 40 40 1717434 1282975 1387804 1974778 0
- 12 6362991 -5 40 242026113 38.04 40 40 40 0 40 40 1662872 1202041 1519721 1978357 0
- 13 6362991 -5 40 238704245 37.51 40 40 40 0 40 40 1549965 1271411 1973291 1566681 1643
- 14 6362991 -5 40 235622401 37.03 40 40 40 0 40 40 2101301 1141451 1603990 1515774 475
- 15 6362991 -5 40 230766669 36.27 40 40 40 0 40 40 2344003 1058571 1440466 1519865 86
- 16 6362991 -5 40 224466237 35.28 38 40 40 2 35 40 2203515 1026017 1474060 1651582 7817
- 17 6362991 -5 40 219990002 34.57 34 40 40 6 25 40 1522515 1125455 2159183 1555765 73
- 18 6362991 -5 40 214104778 33.65 30 40 40 10 15 40 1479795 2068113 1558400 1249337 7346
- 19 6362991 -5 40 212934712 33.46 30 40 40 10 15 40 1432749 1231352 1769799 1920093 8998
- 20 6362991 -5 40 212787944 33.44 29 40 40 11 13 40 1311657 1411663 2126316 1513282 73
- 21 6362991 -5 40 211369187 33.22 28 40 40 12 10 40 1887985 1846300 1300326 1318380 10000
- 22 6362991 -5 40 213371720 33.53 30 40 40 10 15 40 542299 3446249 516615 1848190 9638
- 23 6362991 -5 40 221975899 34.89 36 40 40 4 30 40 347679 1233267 926621 3855355 69
- 24 6362991 -5 40 194378421 30.55 21 40 40 19 -5 40 433560 674358 3262764 1992242 67
- 25 6362991 -5 40 199773985 31.40 23 40 40 17 -2 40 944760 325595 1322800 3769641 195
- 26 6362991 -5 40 179404759 28.20 17 34 40 23 -5 40 3457922 156013 1494664 1254293 99
- 27 6362991 -5 40 163386668 25.68 13 28 40 27 -5 40 1392177 281250 3867895 821491 178
- 28 6362991 -5 40 156230534 24.55 12 25 40 28 -5 40 907189 981249 4174945 299437 171
- 29 6362991 -5 40 163236046 25.65 13 28 40 27 -5 40 1097171 3418678 1567013 280008 121
- 30 6362991 -5 40 151309826 23.78 12 23 40 28 -5 40 3514775 2036194 566277 245613 132
- 31 6362991 -5 40 141392520 22.22 10 21 40 30 -5 40 1569000 4571357 124732 97721 181
- 32 6362991 -5 40 143436943 22.54 10 21 40 30 -5 40 1453607 4519441 38176 351107 660
- 33 6362991 -5 40 114269843 17.96 6 14 30 24 -5 40 3311001 2161254 155505 734297 934
- 34 6362991 -5 40 140638447 22.10 10 20 40 30 -5 40 1501615 1637357 18113 3205237 669
- 35 6362991 -5 40 138910532 21.83 10 20 40 30 -5 40 1532519 3495057 23229 1311834 352
- 36 6362991 -5 40 117158566 18.41 7 15 30 23 -5 40 4074444 1402980 63287 822035 245
-
-
-</help>
-</tool>
-<!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon(a)cshl.edu) -->
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastq_quality_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastx_toolkit/fastq_quality_converter.xml Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,88 @@
+<tool id="cshl_fastq_quality_converter" name="Quality format converter">
+ <description>(ASCII-Numeric)</description>
+ <command>zcat -f $input | fastq_quality_converter $QUAL_FORMAT -o $output -Q $offset</command>
+ <inputs>
+ <param format="fastqsolexa" name="input" type="data" label="Library to convert" />
+
+ <param name="QUAL_FORMAT" type="select" label="Desired output format">
+ <option value="-a">ASCII (letters) quality scores</option>
+ <option value="-n">Numeric quality scores</option>
+ </param>
+
+ <param name="offset" size="4" type="integer" value="33" label="FASTQ ASCII offset" />
+ </inputs>
+
+ <tests>
+ <test>
+ <!-- ASCII to NUMERIC -->
+ <param name="input" value="fastq_qual_conv1.fastq" />
+ <param name="QUAL_FORMAT" value="Numeric quality scores" />
+ <param name="offset" value="64" />
+ <output name="output" file="fastq_qual_conv1.out" />
+ </test>
+ <test>
+ <!-- ASCII to ASCII (basically, a no-op, but it should still produce a valid output -->
+ <param name="input" value="fastq_qual_conv1.fastq" />
+ <param name="QUAL_FORMAT" value="ASCII (letters) quality scores" />
+ <param name="offset" value="64" />
+ <output name="output" file="fastq_qual_conv1a.out" />
+ </test>
+ <test>
+ <!-- NUMERIC to ASCII -->
+ <param name="input" value="fastq_qual_conv2.fastq" />
+ <param name="QUAL_FORMAT" value="ASCII (letters) quality scores" />
+ <param name="offset" value="64" />
+ <output name="output" file="fastq_qual_conv2.out" />
+ </test>
+ <test>
+ <!-- NUMERIC to NUMERIC (basically, a no-op, but it should still produce a valid output -->
+ <param name="input" value="fastq_qual_conv2.fastq" />
+ <param name="QUAL_FORMAT" value="Numeric quality scores" />
+ <param name="offset" value="64" />
+ <output name="output" file="fastq_qual_conv2n.out" />
+ </test>
+ </tests>
+
+ <outputs>
+ <data format="fastqsolexa" name="output" metadata_source="input" />
+ </outputs>
+<help>
+
+**What it does**
+
+Converts a solexa FASTQ file to/from numeric or ASCII quality format.
+
+.. class:: warningmark
+
+Re-scaling is **not** performed. (e.g. conversion from Phred scale to Solexa scale).
+
+
+-----
+
+FASTQ with Numeric quality scores::
+
+ @CSHL__2_FC042AGWWWXX:8:1:120:202
+ ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC
+ +CSHL__2_FC042AGWWWXX:8:1:120:202
+ 40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8
+ @CSHL__2_FC042AGWWWXX:8:1:103:1185
+ ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC
+ +CSHL__2_FC042AGWWWXX:8:1:103:1185
+ 40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2
+
+
+FASTQ with ASCII quality scores::
+
+ @CSHL__2_FC042AGWWWXX:8:1:120:202
+ ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC
+ +CSHL__2_FC042AGWWWXX:8:1:120:202
+ hhhhThhhhFhh\hhYhTh?^hN[hHACG?KJ?UJH
+ @CSHL__2_FC042AGWWWXX:8:1:103:1185
+ ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC
+ +CSHL__2_FC042AGWWWXX:8:1:103:1185
+ hhhhhca_hhh`^Vh@IVQNHdObVLWCJ@HBDY^B
+
+
+</help>
+</tool>
+<!-- FASTQ-Quality-Converter is part of the FASTX-toolkit, by A.Gordon (gordon(a)cshl.edu) -->
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastx_barcode_splitter.xml
--- a/tools/fastx_toolkit/fastx_barcode_splitter.xml Mon Sep 14 15:27:55 2009 -0400
+++ b/tools/fastx_toolkit/fastx_barcode_splitter.xml Mon Sep 14 17:03:17 2009 -0400
@@ -1,6 +1,6 @@
<tool id="cshl_fastx_barcode_splitter" name="Barcode Splitter">
<description></description>
- <command>fastx_barcode_splitter_galaxy_wrapper.sh $BARCODE $input "$input.name" --mismatches $mismatches --partial $partial $EOL > $output </command>
+ <command interpreter="sh">fastx_barcode_splitter_galaxy_wrapper.sh $BARCODE $input "$input.name" "$output.files_path" --mismatches $mismatches --partial $partial $EOL > $output </command>
<inputs>
<param format="txt" name="BARCODE" type="data" label="Barcodes to use" />
@@ -61,7 +61,7 @@
**Output Example**
-.. image:: ../static/fastx_icons/barcode_splitter_output_example.png
+.. image:: ./static/fastx_icons/barcode_splitter_output_example.png
</help>
</tool>
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastx_barcode_splitter_galaxy_wrapper.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastx_toolkit/fastx_barcode_splitter_galaxy_wrapper.sh Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+# FASTX-toolkit - FASTA/FASTQ preprocessing tools.
+# Copyright (C) 2009 A. Gordon (gordon(a)cshl.edu)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+#
+#This is a shell script wrapper for 'fastx_barcode_splitter.pl'
+#
+# 1. Output files are saved at the dataset's files_path directory.
+#
+# 2. 'fastx_barcode_splitter.pl' outputs a textual table.
+# This script turns it into pretty HTML with working URL
+# (so lazy users can just click on the URLs and get their files)
+
+BARCODE_FILE="$1"
+FASTQ_FILE="$2"
+LIBNAME="$3"
+OUTPUT_PATH="$4"
+shift 4
+# The rest of the parameters are passed to the split program
+
+if [ "$OUTPUT_PATH" == "" ]; then
+ echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2
+ exit 1
+fi
+
+#Sanitize library name, make sure we can create a file with this name
+LIBNAME=${LIBNAME//\.gz/}
+LIBNAME=${LIBNAME//\.txt/}
+LIBNAME=${LIBNAME//[^[:alnum:]]/_}
+
+if [ ! -r "$FASTQ_FILE" ]; then
+ echo "Error: Input file ($FASTQ_FILE) not found!" >&2
+ exit 1
+fi
+if [ ! -r "$BARCODE_FILE" ]; then
+ echo "Error: barcode file ($BARCODE_FILE) not found!" >&2
+ exit 1
+fi
+mkdir -p "$OUTPUT_PATH"
+if [ ! -d "$OUTPUT_PATH" ]; then
+ echo "Error: failed to create output path '$OUTPUT_PATH'" >&2
+ exit 1
+fi
+
+PUBLICURL=""
+BASEPATH="$OUTPUT_PATH/"
+#PREFIX="$BASEPATH"`date "+%Y-%m-%d_%H%M__"`"${LIBNAME}__"
+PREFIX="$BASEPATH""${LIBNAME}__"
+SUFFIX=".txt"
+
+RESULTS=`zcat -f "$FASTQ_FILE" | fastx_barcode_splitter.pl --bcfile "$BARCODE_FILE" --prefix "$PREFIX" --suffix "$SUFFIX" "$@"`
+if [ $? != 0 ]; then
+ echo "error"
+fi
+
+#
+# Convert the textual tab-separated table into simple HTML table,
+# with the local path replaces with a valid URL
+echo "<html><body><table border=1>"
+echo "$RESULTS" | sed -r "s|$BASEPATH(.*)|<a href=\"\\1\">\\1</a>|" | sed '
+i<tr><td>
+s|\t|</td><td>|g
+a<\/td><\/tr>
+'
+echo "<p>"
+echo "</table></body></html>"
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastx_clipper.xml
--- a/tools/fastx_toolkit/fastx_clipper.xml Mon Sep 14 15:27:55 2009 -0400
+++ b/tools/fastx_toolkit/fastx_clipper.xml Mon Sep 14 17:03:17 2009 -0400
@@ -1,15 +1,11 @@
<tool id="cshl_fastx_clipper" name="Clip" version="1.0.1" >
<description>adapter sequences</description>
<command>
- zcat -f $input | fastx_clipper -s $maxmismatches -l $minlength -a $clip_source.clip_sequence -d $keepdelta -o $output -v $KEEP_N $DISCARD_OPTIONS
+ zcat -f $input | fastx_clipper -l $minlength -a $clip_source.clip_sequence -d $keepdelta -o $output -v $KEEP_N $DISCARD_OPTIONS
</command>
<inputs>
<param format="fasta,fastqsolexa" name="input" type="data" label="Library to clip" />
-
- <param name="maxmismatches" size="4" type="integer" value="2">
- <label>Maximum number of mismatches allowed (when matching the adapter sequence)</label>
- </param>
<param name="minlength" size="4" type="integer" value="15">
<label>Minimum sequence length (after clipping, sequences shorter than this length will be discarded)</label>
@@ -52,22 +48,23 @@
</param>
</inputs>
-
+ <!--
+ #functional test with param value starting with - fails.
<tests>
<test>
<!-- Clip a FASTQ file -->
<param name="input" value="fastx_clipper1.fastq" />
<param name="maxmismatches" value="2" />
<param name="minlength" value="15" />
- <param name="clip_source.clip_source_list" value="user" />
- <param name="clip_source.clip_sequence" value="CAATTGGTTAATCCCCCTATATA" />
+ <param name="clip_source_list" value="user" />
+ <param name="clip_sequence" value="CAATTGGTTAATCCCCCTATATA" />
<param name="keepdelta" value="0" />
<param name="KEEP_N" value="-n" />
<param name="DISCARD_OPTIONS" value="-c" />
<output name="output" file="fastx_clipper1a.out" />
</test>
</tests>
-
+ -->
<outputs>
<data format="input" name="output" metadata_source="input" />
</outputs>
@@ -82,7 +79,7 @@
**Clipping Illustration:**
-.. image:: ../static/fastx_icons/fastx_clipper_illustration.png
+.. image:: ./static/fastx_icons/fastx_clipper_illustration.png
@@ -93,7 +90,7 @@
**Clipping Example:**
-.. image:: ../static/fastx_icons/fastx_clipper_example.png
+.. image:: ./static/fastx_icons/fastx_clipper_example.png
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastx_collapser.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastx_toolkit/fastx_collapser.xml Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,75 @@
+<tool id="cshl_fastx_collapser" name="Collapse">
+ <description>sequences</description>
+ <command>zcat -f '$input' | fastx_collapser -v -o '$output' </command>
+
+ <inputs>
+ <param format="fastqsolexa,fasta" name="input" type="data" label="Library to collapse" />
+ </inputs>
+
+ <tests>
+ <test>
+ <param name="input" value="fasta_collapser1.fasta" />
+ <output name="output" file="fasta_collapser1.out" />
+ </test>
+ </tests>
+
+ <outputs>
+ <data format="fasta" name="output" metadata_source="input" />
+ </outputs>
+ <help>
+
+**What it does**
+
+This tool collapses identical sequences in a FASTA file into a single sequence.
+
+--------
+
+**Example**
+
+Example Input File (Sequence "ATAT" appears multiple times)::
+
+ >CSHL_2_FC0042AGLLOO_1_1_605_414
+ TGCG
+ >CSHL_2_FC0042AGLLOO_1_1_537_759
+ ATAT
+ >CSHL_2_FC0042AGLLOO_1_1_774_520
+ TGGC
+ >CSHL_2_FC0042AGLLOO_1_1_742_502
+ ATAT
+ >CSHL_2_FC0042AGLLOO_1_1_781_514
+ TGAG
+ >CSHL_2_FC0042AGLLOO_1_1_757_487
+ TTCA
+ >CSHL_2_FC0042AGLLOO_1_1_903_769
+ ATAT
+ >CSHL_2_FC0042AGLLOO_1_1_724_499
+ ATAT
+
+Example Output file::
+
+ >1-1
+ TGCG
+ >2-4
+ ATAT
+ >3-1
+ TGGC
+ >4-1
+ TGAG
+ >5-1
+ TTCA
+
+.. class:: infomark
+
+Original Sequence Names / Lane descriptions (e.g. "CSHL_2_FC0042AGLLOO_1_1_742_502") are discarded.
+
+The output seqeunce name is composed of two numbers: the first is the sequence's number, the second is the multiplicity value.
+
+The following output::
+
+ >2-4
+ ATAT
+
+means that the sequence "ATAT" is the second sequence in the file, and it appeared 4 times in the input FASTA file.
+
+</help>
+</tool>
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastx_nucleotides_distribution.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastx_toolkit/fastx_nucleotides_distribution.xml Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,66 @@
+<tool id="cshl_fastx_nucleotides_distribution" name="Nucleotides Distribution">
+ <description>chart</description>
+ <command>fastx_nucleotide_distribution_graph.sh -t '$input.name' -i $input -o $output</command>
+
+ <inputs>
+ <param format="txt" name="input" type="data" label="Statistics Text File (output of 'FASTX Statistics' tool)" />
+ </inputs>
+
+ <outputs>
+ <data format="png" name="output" metadata_source="input" />
+ </outputs>
+<help>
+
+**What it does**
+
+Creates a stacked-histogram graph for the nucleotide distribution in the Solexa library.
+
+.. class:: infomark
+
+**TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool.
+
+-----
+
+**Output Examples**
+
+
+
+The following chart clearly shows the barcode used at the 5'-end of the library: **GATCT**
+
+.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_1.png
+
+
+
+
+
+
+
+In the following chart, one can almost 'read' the most abundant sequence by looking at the dominant values: **TGATA TCGTA TTGAT GACTG AA...**
+
+.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_2.png
+
+
+
+
+
+
+
+
+The following chart shows a growing number of unknown (N) nucleotides towards later cycles (which might indicate a sequencing problem):
+
+.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_3.png
+
+
+
+
+
+
+
+
+But most of the time, the chart will look rather random:
+
+.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_4.png
+
+</help>
+</tool>
+<!-- FASTQ-Nucleotides-Distribution is part of the FASTX-toolkit, by A.Gordon (gordon(a)cshl.edu) -->
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastx_quality_statistics.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastx_toolkit/fastx_quality_statistics.xml Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,102 @@
+<tool id="cshl_fastx_quality_statistics" name="Quality Statistics">
+ <description></description>
+ <command>zcat -f $input | fastx_quality_stats -o $output -Q $offset</command>
+
+ <inputs>
+ <param format="fasta,fastqsolexa" name="input" type="data" label="Library to analyse" />
+ <param name="offset" size="4" type="integer" value="33" label="FASTQ ASCII offset" />
+ </inputs>
+
+ <tests>
+ <test>
+ <param name="input" value="fastq_stats1.fastq" />
+ <param name="offset" value="64" />
+ <output name="output" file="fastq_stats1.out" />
+ </test>
+ </tests>
+
+ <outputs>
+ <data format="txt" name="output" metadata_source="input" />
+ </outputs>
+
+<help>
+
+**What it does**
+
+Creates quality statistics report for the given Solexa/FASTQ library.
+
+.. class:: infomark
+
+**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools.
+
+-----
+
+**The output file will contain the following fields:**
+
+* column = column number (1 to 36 for a 36-cycles read solexa file)
+* count = number of bases found in this column.
+* min = Lowest quality score value found in this column.
+* max = Highest quality score value found in this column.
+* sum = Sum of quality score values for this column.
+* mean = Mean quality score value for this column.
+* Q1 = 1st quartile quality score.
+* med = Median quality score.
+* Q3 = 3rd quartile quality score.
+* IQR = Inter-Quartile range (Q3-Q1).
+* lW = 'Left-Whisker' value (for boxplotting).
+* rW = 'Right-Whisker' value (for boxplotting).
+* A_Count = Count of 'A' nucleotides found in this column.
+* C_Count = Count of 'C' nucleotides found in this column.
+* G_Count = Count of 'G' nucleotides found in this column.
+* T_Count = Count of 'T' nucleotides found in this column.
+* N_Count = Count of 'N' nucleotides found in this column.
+
+
+
+
+
+
+**Output Example**::
+
+ column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count
+ 1 6362991 -4 40 250734117 39.41 40 40 40 0 40 40 1396976 1329101 678730 2958184 0
+ 2 6362991 -5 40 250531036 39.37 40 40 40 0 40 40 1786786 1055766 1738025 1782414 0
+ 3 6362991 -5 40 248722469 39.09 40 40 40 0 40 40 2296384 984875 1443989 1637743 0
+ 4 6362991 -5 40 247654797 38.92 40 40 40 0 40 40 1683197 1410855 1722633 1546306 0
+ 5 6362991 -4 40 248214827 39.01 40 40 40 0 40 40 2536861 1167423 1248968 1409739 0
+ 6 6362991 -5 40 248499903 39.05 40 40 40 0 40 40 1598956 1236081 1568608 1959346 0
+ 7 6362991 -4 40 247719760 38.93 40 40 40 0 40 40 1692667 1822140 1496741 1351443 0
+ 8 6362991 -5 40 245745205 38.62 40 40 40 0 40 40 2230936 1343260 1529928 1258867 0
+ 9 6362991 -5 40 245766735 38.62 40 40 40 0 40 40 1702064 1306257 1336511 2018159 0
+ 10 6362991 -5 40 245089706 38.52 40 40 40 0 40 40 1519917 1446370 1450995 1945709 0
+ 11 6362991 -5 40 242641359 38.13 40 40 40 0 40 40 1717434 1282975 1387804 1974778 0
+ 12 6362991 -5 40 242026113 38.04 40 40 40 0 40 40 1662872 1202041 1519721 1978357 0
+ 13 6362991 -5 40 238704245 37.51 40 40 40 0 40 40 1549965 1271411 1973291 1566681 1643
+ 14 6362991 -5 40 235622401 37.03 40 40 40 0 40 40 2101301 1141451 1603990 1515774 475
+ 15 6362991 -5 40 230766669 36.27 40 40 40 0 40 40 2344003 1058571 1440466 1519865 86
+ 16 6362991 -5 40 224466237 35.28 38 40 40 2 35 40 2203515 1026017 1474060 1651582 7817
+ 17 6362991 -5 40 219990002 34.57 34 40 40 6 25 40 1522515 1125455 2159183 1555765 73
+ 18 6362991 -5 40 214104778 33.65 30 40 40 10 15 40 1479795 2068113 1558400 1249337 7346
+ 19 6362991 -5 40 212934712 33.46 30 40 40 10 15 40 1432749 1231352 1769799 1920093 8998
+ 20 6362991 -5 40 212787944 33.44 29 40 40 11 13 40 1311657 1411663 2126316 1513282 73
+ 21 6362991 -5 40 211369187 33.22 28 40 40 12 10 40 1887985 1846300 1300326 1318380 10000
+ 22 6362991 -5 40 213371720 33.53 30 40 40 10 15 40 542299 3446249 516615 1848190 9638
+ 23 6362991 -5 40 221975899 34.89 36 40 40 4 30 40 347679 1233267 926621 3855355 69
+ 24 6362991 -5 40 194378421 30.55 21 40 40 19 -5 40 433560 674358 3262764 1992242 67
+ 25 6362991 -5 40 199773985 31.40 23 40 40 17 -2 40 944760 325595 1322800 3769641 195
+ 26 6362991 -5 40 179404759 28.20 17 34 40 23 -5 40 3457922 156013 1494664 1254293 99
+ 27 6362991 -5 40 163386668 25.68 13 28 40 27 -5 40 1392177 281250 3867895 821491 178
+ 28 6362991 -5 40 156230534 24.55 12 25 40 28 -5 40 907189 981249 4174945 299437 171
+ 29 6362991 -5 40 163236046 25.65 13 28 40 27 -5 40 1097171 3418678 1567013 280008 121
+ 30 6362991 -5 40 151309826 23.78 12 23 40 28 -5 40 3514775 2036194 566277 245613 132
+ 31 6362991 -5 40 141392520 22.22 10 21 40 30 -5 40 1569000 4571357 124732 97721 181
+ 32 6362991 -5 40 143436943 22.54 10 21 40 30 -5 40 1453607 4519441 38176 351107 660
+ 33 6362991 -5 40 114269843 17.96 6 14 30 24 -5 40 3311001 2161254 155505 734297 934
+ 34 6362991 -5 40 140638447 22.10 10 20 40 30 -5 40 1501615 1637357 18113 3205237 669
+ 35 6362991 -5 40 138910532 21.83 10 20 40 30 -5 40 1532519 3495057 23229 1311834 352
+ 36 6362991 -5 40 117158566 18.41 7 15 30 23 -5 40 4074444 1402980 63287 822035 245
+
+
+</help>
+</tool>
+<!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon(a)cshl.edu) -->
diff -r 0f97b3048bc3 -r 40c5e1853a66 tools/fastx_toolkit/fastx_renamer.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastx_toolkit/fastx_renamer.xml Mon Sep 14 17:03:17 2009 -0400
@@ -0,0 +1,56 @@
+<tool id="cshl_fastx_renamer" name="Rename" version="0.0.11" >
+ <description>sequence identifiers</description>
+ <command>zcat -f $input | fastx_renamer -n $TYPE -o $output -v </command>
+
+ <inputs>
+ <param format="fastqsolexa,fasta,fastqsanger" name="input" type="data" label="FASTQ/A Library to rename" />
+
+ <param name="TYPE" type="select" label="Rename sequence identifiers to">
+ <option value="SEQ">Nucleotides sequence</option>
+ <option value="COUNT">Numeric Counter</option>
+ </param>
+ </inputs>
+
+ <outputs>
+ <data format="input" name="output" metadata_source="input" />
+ </outputs>
+
+<help>
+
+**What it does**
+
+This tool renames the sequence identifiers in a FASTQ/A file.
+
+.. class:: infomark
+
+Use this tool at the beginning of your workflow, as a way to keep the original sequence (before trimming,clipping,barcode-removal, etc).
+
+--------
+
+**Example**
+
+The following Solexa-FASTQ file::
+
+ @CSHL_4_FC042GAMMII_2_1_517_596
+ GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
+ +CSHL_4_FC042GAMMII_2_1_517_596
+ 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40
+
+Renamed to **nucleotides sequence**::
+
+ @GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
+ GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
+ +GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
+ 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40
+
+Renamed to **numeric counter**::
+
+ @1
+ GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT
+ +1
+ 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40
+
+
+</help>
+</tool>
+<!-- FASTQ-to-FASTA is part of the FASTX-toolkit, by A.Gordon (gordon(a)cshl.edu) -->
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/0f97b3048bc3
changeset: 2690:0f97b3048bc3
user: anton(a)nekrut-mbp.bx.psu.edu
date: Mon Sep 14 15:27:55 2009 -0400
description:
Modifications for fastq splitted help
2 file(s) affected in this change:
tool_conf.xml.sample
tools/metag_tools/split_paired_reads.xml
diffs (23 lines):
diff -r e58e140b89f7 -r 0f97b3048bc3 tool_conf.xml.sample
--- a/tool_conf.xml.sample Mon Sep 14 14:54:11 2009 -0400
+++ b/tool_conf.xml.sample Mon Sep 14 15:27:55 2009 -0400
@@ -338,6 +338,7 @@
<tool file="visualization/genetrack.xml" />
</section>
<section name="SAM Tools" id="samtools">
+ <tool file="samtools/sam_bitwise_flag_filter.xml" />
<tool file="samtools/sam_to_bam.xml" />
<tool file="samtools/sam_merge.xml" />
<tool file="samtools/sam_pileup.xml" />
diff -r e58e140b89f7 -r 0f97b3048bc3 tools/metag_tools/split_paired_reads.xml
--- a/tools/metag_tools/split_paired_reads.xml Mon Sep 14 14:54:11 2009 -0400
+++ b/tools/metag_tools/split_paired_reads.xml Mon Sep 14 15:27:55 2009 -0400
@@ -20,7 +20,7 @@
**What it does**
-This tool splits a single paired-end file in half and returns two files with each ends.
+Splits a single fastq datasret representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length.
-----
1
0
14 Sep '09
details: http://www.bx.psu.edu/hg/galaxy/rev/e58e140b89f7
changeset: 2689:e58e140b89f7
user: Kelly Vincent <kpvincent(a)bx.psu.edu>
date: Mon Sep 14 14:54:11 2009 -0400
description:
Updated Bowtie wrapper tool to add a number of threads parameter and remove two unnecessary options
2 file(s) affected in this change:
tools/sr_mapping/bowtie_wrapper.py
tools/sr_mapping/bowtie_wrapper.xml
diffs (198 lines):
diff -r 7f4c8fee3b39 -r e58e140b89f7 tools/sr_mapping/bowtie_wrapper.py
--- a/tools/sr_mapping/bowtie_wrapper.py Mon Sep 14 12:23:16 2009 -0400
+++ b/tools/sr_mapping/bowtie_wrapper.py Mon Sep 14 14:54:11 2009 -0400
@@ -13,6 +13,7 @@
def __main__():
#Parse Command Line
parser = optparse.OptionParser()
+ parser.add_option('', '--threads', dest='threads', help='The number of threads to run')
parser.add_option('', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format')
parser.add_option('', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format')
parser.add_option('', '--output', dest='output', help='The output file')
@@ -35,7 +36,6 @@
parser.add_option('', '--offbase', dest='offbase', help='Number the first base of a reference sequence as n when outputting alignments')
parser.add_option('', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions")
parser.add_option('', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read')
- parser.add_option('', '--threadMem', dest='threadMem', help='Number of megabytes of memory a given thread is given to store path descriptors in best mode')
parser.add_option('', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable')
parser.add_option('', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments')
parser.add_option('', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments')
@@ -45,7 +45,6 @@
parser.add_option('', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand')
parser.add_option('', '--phased', dest='phased', help='Whether or not it should alternate between using the forward and mirror indexes in a series of phases so that only half of the index is resident in memory at one time')
parser.add_option('', '--offrate', dest='offrate', help='Override the offrate of the index to n')
- parser.add_option('', '--mm', dest='mm', help='Whether or not to use memory-mapped I/O to load the index')
parser.add_option('', '--seed', dest='seed', help='Seed for pseudo-random number generator')
parser.add_option('', '--dbkey', dest='dbkey', help='')
parser.add_option('', '--params', dest='params', help='Whether to use default or specified parameters')
@@ -70,10 +69,10 @@
if options.genomeSource == 'history':
# set up commands
if options.index_settings =='index_pre_set':
- indexing_cmds = ''
+ indexing_cmds = '--quiet'
else:
try:
- indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \
+ indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s --quiet' % \
(('','--noauto')[options.iauto_b=='set'],
('','--packed')[options.ipacked=='packed'],
('','--bmax %s'%options.ibmax)[options.ibmax!='None' and options.ibmax>=1],
@@ -88,7 +87,7 @@
('','--cutoff %s'%options.icutoff)[int(options.icutoff)>0],
('','--oldpmap')[options.ioldpmap=='yes'])
except ValueError:
- indexing_cmds = ''
+ indexing_cmds = '--quiet'
# make temp directory for placement of indices and copy reference file there
tmp_dir = tempfile.gettempdir()
@@ -97,7 +96,7 @@
except Exception, erf:
stop_err('Error creating temp directory for indexing purposes\n' + str(erf))
options.ref = os.path.join(tmp_dir,os.path.split(options.ref)[1])
- cmd1 = 'cd %s; bowtie-build %s -f %s %s > /dev/null' % (tmp_dir, indexing_cmds, options.ref, options.ref)
+ cmd1 = 'cd %s; bowtie-build %s -f %s %s' % (tmp_dir, indexing_cmds, options.ref, options.ref)
try:
os.system(cmd1)
except Exception, erf:
@@ -106,11 +105,11 @@
# set up aligning and generate aligning command options
# automatically set threads to 8 in both cases
if options.params == 'pre_set':
- aligning_cmds = '-p 8'
+ aligning_cmds = '-p %s --quiet' % options.threads
else:
try:
aligning_cmds = '%s %s %s %s %s %s %s %s %s %s %s %s %s %s ' \
- '%s %s %s %s %s %s %s %s %s %s %s %s %s %s -p 8' % \
+ '%s %s %s %s %s %s %s %s %s %s %s %s -p %s --quiet' % \
(('','-s %s'%options.skip)[options.skip!='None'],
('','-u %s'%options.alignLimit)[int(options.alignLimit)>0],
('','-5 %s'%options.trimH)[int(options.trimH)>=0],
@@ -128,7 +127,6 @@
('','--norc')[options.reverseAlign=='noReverse'],
('','--maxbts %s'%options.maxBacktracks)[options.maxBacktracks!='None' and (options.mismatchSeed=='2' or options.mismatchSeed=='3')],
('','-y')[options.tryHard=='doTryHard'],
- ('','--chunkmbs %s'%options.threadMem)[options.threadMem!='None' and int(options.threadMem)>=0],
('','-k %s'%options.valAlign)[options.valAlign!='None' and int(options.valAlign)>=0],
('','-a')[options.allValAligns=='doAllValAligns' and int(options.allValAligns)>=0],
('','-m %s'%options.suppressAlign)[int(options.suppressAlign)>=0],
@@ -137,18 +135,18 @@
('','-B %s'%options.offbase)[int(options.offbase)>=0],
('','-z %s'%options.phased)[options.phased!='None'],
('','-o %s'%options.offrate)[int(options.offrate)>=0],
- ('','--mm')[options.mm=='doMm'],
- ('','--seed %s'%options.seed)[int(options.seed)>=0])
+ ('','--seed %s'%options.seed)[int(options.seed)>=0],
+ options.threads)
except ValueError:
- aligning_cmds = '-p 8'
+ aligning_cmds = '-p %s --quiet' % options.threads
tmp_out = tempfile.NamedTemporaryFile()
# prepare actual aligning commands
if options.paired == 'paired':
- cmd2 = 'bowtie %s %s -1 %s -2 %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.input1, options.input2, tmp_out.name)
+ cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % (aligning_cmds, options.ref, options.input1, options.input2, tmp_out.name)
else:
- cmd2 = 'bowtie %s %s %s > %s 2> /dev/null' % (aligning_cmds, options.ref, options.input1, tmp_out.name)
+ cmd2 = 'bowtie %s %s %s > %s' % (aligning_cmds, options.ref, options.input1, tmp_out.name)
# prepare command to convert bowtie output to sam and alternative
cmd3 = 'bowtie2sam.pl %s > %s' % (tmp_out.name, options.output)
cmd4 = 'cp %s %s' % (tmp_out.name, options.output)
diff -r 7f4c8fee3b39 -r e58e140b89f7 tools/sr_mapping/bowtie_wrapper.xml
--- a/tools/sr_mapping/bowtie_wrapper.xml Mon Sep 14 12:23:16 2009 -0400
+++ b/tools/sr_mapping/bowtie_wrapper.xml Mon Sep 14 14:54:11 2009 -0400
@@ -2,6 +2,7 @@
<description> fast alignment of reads against reference sequence </description>
<command interpreter="python">
bowtie_wrapper.py
+ --threads="8"
--input1=$singlePaired.input1
#if $singlePaired.sPaired == "paired":
--input2=$singlePaired.input2
@@ -33,17 +34,14 @@
--suppressAlign=$singlePaired.params.suppressAlign
--offbase=$singlePaired.params.offbase
--offrate=$singlePaired.params.offrate
- --mm=$singlePaired.params.mm
--seed=$singlePaired.params.seed
--best=$singlePaired.params.bestOption.best
#if $singlePaired.params.bestOption.best == "doBest":
--maxBacktracks=$singlePaired.params.bestOption.maxBacktracks
- --threadMem=$singlePaired.params.bestOption.threadMem
--strata=$singlePaired.params.bestOption.strata
--phased="None"
#else:
--maxBacktracks="None"
- --threadMem="None"
--strata="None"
#if $singlePaired.sPaired =="single":
--phased=$singlePaired.params.bestOption.phased
@@ -83,7 +81,6 @@
--offbase="None"
--best="None"
--maxBacktracks="None"
- --threadMem="None"
--strata="None"
--minInsert="None"
--maxInsert="None"
@@ -93,7 +90,6 @@
--reverseAlign="None"
--phased="None"
--offrate="None"
- --mm="None"
--seed="None"
#end if
#if $refGenomeSource.genomeSource == "history":
@@ -264,7 +260,6 @@
</when>
<when value="doBest">
<param name="maxBacktracks" type="integer" value="800" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
- <param name="threadMem" type="integer" value="32" label="Number of megabytes of memory a given thread is given to store path descriptors in best mode (--chunkmbs)" help="If running in best mode, and you run out of memory, try adjusting this" />
<param name="strata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)">
<option value="noStrata">Do not use strata option</option>
<option value="doStrata">Use strata option</option>
@@ -272,10 +267,6 @@
</when>
</conditional> <!-- bestOption -->
<param name="offrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" />
- <param name="mm" type="select" label="Whether or not to use memory-mapped I/O to load the index (--m)">
- <option value="noMm">Use POSIX/C file I/O</option>
- <option value="doMm">Use memory-mapped I/O</option>
- </param>
<param name="seed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" />
</when> <!-- full -->
</conditional> <!-- params -->
@@ -339,7 +330,6 @@
</when>
<when value="doBest">
<param name="maxBacktracks" type="integer" value="800" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" />
- <param name="threadMem" type="integer" value="32" label="Number of megabytes of memory a given thread is given to store path descriptors in best mode (--chunkmbs)" help="If running in best mode, and you run out of memory, try adjusting this" />
<param name="strata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)">
<option value="noStrata">Do not use strata option</option>
<option value="doStrata">Use strata option</option>
@@ -347,10 +337,6 @@
</when>
</conditional>
<param name="offrate" type="integer" value="-1" label="Override the offrate of the index to n -o)" help="-1 for default" />
- <param name="mm" type="select" label="Whether or not to use memory-mapped I/O to load the index (--mm)">
- <option value="noMm">Use POSIX/C file I/O</option>
- <option value="doMm">Use memory-mapped I/O</option>
- </param>
<param name="seed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" />
</when> <!-- full -->
</conditional> <!-- params -->
@@ -431,10 +417,8 @@
<param name="offbase" value="0" />
<param name="best" value="doBest" />
<param name="maxBacktracks" value="800" />
- <param name="threadMem" value="32" />
<param name="strata" value="noStrata" />
<param name="offrate" value="-1" />
- <param name="mm" value="noMm" />
<param name="seed" value="403" />
<output name="output" ftype="sam" file="bowtie_out2.sam" />
</test>
1
0