details: http://www.bx.psu.edu/hg/galaxy/rev/674aaee19991 changeset: 3440:674aaee19991 user: Dan Blankenberg <dan@bx.psu.edu> date: Wed Feb 24 16:50:06 2010 -0500 description: Update Combine FASTA and QUAL tool to allow the quality score file to be optional. When not provided, the output will be fastqsanger or fastqsolid (when a csfasta is provided) with each quality score being the maximal allowed value (93). diffstat: lib/galaxy_utils/sequence/fasta.py | 2 + lib/galaxy_utils/sequence/fastq.py | 25 + test-data/fastq_combiner_no_qual_ascii_out_1.fastqsolid | 576 +++++++++++++ test-data/fastq_combiner_no_qual_decimal_out_1.fastqsanger | 12 + tools/fastq/fastq_combiner.py | 11 +- tools/fastq/fastq_combiner.xml | 16 +- 6 files changed, 637 insertions(+), 5 deletions(-) diffs (725 lines): diff -r 46849d69d7e6 -r 674aaee19991 lib/galaxy_utils/sequence/fasta.py --- a/lib/galaxy_utils/sequence/fasta.py Wed Feb 24 16:48:22 2010 -0500 +++ b/lib/galaxy_utils/sequence/fasta.py Wed Feb 24 16:50:06 2010 -0500 @@ -53,6 +53,8 @@ def close( self ): return self.file.close() def get( self, sequence_id ): + if not isinstance( sequence_id, basestring ): + sequence_id = sequence_id.identifier rval = None if sequence_id in self.offset_dict: initial_offset = self.file.tell() diff -r 46849d69d7e6 -r 674aaee19991 lib/galaxy_utils/sequence/fastq.py --- a/lib/galaxy_utils/sequence/fastq.py Wed Feb 24 16:48:22 2010 -0500 +++ b/lib/galaxy_utils/sequence/fastq.py Wed Feb 24 16:50:06 2010 -0500 @@ -3,6 +3,7 @@ import string import transform from sequence import SequencingRead +from fasta import fastaSequence class fastqSequencingRead( SequencingRead ): format = 'sanger' #sanger is default @@ -456,6 +457,8 @@ def close( self ): return self.file.close() def get( self, sequence_id ): + if not isinstance( sequence_id, basestring ): + sequence_id = sequence_id.identifier rval = None if sequence_id in self.offset_dict: initial_offset = self.file.tell() @@ -593,3 +596,25 @@ fastq_read.sequence = fasta_seq.sequence fastq_read.quality = quality_seq.sequence return fastq_read + +class fastqFakeFastaScoreReader( object ): + def __init__( self, format = 'sanger', quality_encoding = None ): + self.fastq_read = fastqSequencingRead.get_class_by_format( format )() + if quality_encoding != 'decimal': + quality_encoding = 'ascii' + self.quality_encoding = quality_encoding + def close( self ): + return #nothing to close + def get( self, sequence ): + assert isinstance( sequence, fastaSequence ), 'fastqFakeFastaScoreReader requires a fastaSequence object as the parameter' + #add sequence to fastq_read, then get_sequence(), color space adapters do not have quality score values + self.fastq_read.sequence = sequence.sequence + new_sequence = fastaSequence() + new_sequence.identifier = sequence.identifier + if self.quality_encoding == 'ascii': + new_sequence.sequence = chr( self.fastq_read.ascii_max ) * len( self.fastq_read.get_sequence() ) + else: + new_sequence.sequence = ( "%i " % self.fastq_read.quality_max ) * len( self.fastq_read.get_sequence() ) + return new_sequence + def has_data( self ): + return '' #No actual data exist, none can be remaining diff -r 46849d69d7e6 -r 674aaee19991 test-data/fastq_combiner_no_qual_ascii_out_1.fastqsolid --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastq_combiner_no_qual_ascii_out_1.fastqsolid Wed Feb 24 16:50:06 2010 -0500 @@ -0,0 +1,576 @@ +@1831_573_1004_F3 +T00030133312212111300011021310132222 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1050_F3 +T03330322230322112131010221102122113 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1067_F3 +T00023032023103330112220321200200002 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1219_F3 +T11211130300300301021212330201121310 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1242_F3 +T02132003121011302100130302112221121 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1333_F3 +T00200312330110101013212313222303112 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1362_F3 +T21203131001102231121211101111321131 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1448_F3 +T23101211223113320132212331313312022 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1490_F3 +T31312310323301210002210123101021011 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1523_F3 +T10322001220012223202202222001230222 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1578_F3 +T21202302100010020121100311022120111 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1647_F3 +T10222233301013033120132223202022123 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1684_F3 +T13310013212312012302121010221231123 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1769_F3 +T33220123030232212032021032302233131 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1853_F3 +T11000012111222211310103212122102331 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1943_F3 +T20300123032210232001222122001132111 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_573_1977_F3 +T22212302221310332321002303112011311 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_109_F3 +T13122332123301331032220222133301033 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_148_F3 +T01200113123030012202302312200010231 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_185_F3 +T21123333211302300321312212102123121 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_243_F3 +T30221011230013102201033131203302330 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_257_F3 +T00301133110002100302003000000102301 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_293_F3 +T23213210003000103010211331300320130 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_389_F3 +T21032213032101122333230212301312020 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_575_F3 +T33313322100212102033032123311211302 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_592_F3 +T33103330110123102223122023103310330 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_617_F3 +T20021031221222021210021322200223211 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_725_F3 +T32010020322130330333010031120313210 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_734_F3 +T31132301200020012302210322213222222 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_824_F3 +T30212100033032123311211302122020013 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_959_F3 +T11212130220131221111002020123311211 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1062_F3 +T30112230030300221001032033012211012 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1092_F3 +T02013221200031031212200000111130310 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1103_F3 +T20313113203302010303131123021310121 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1116_F3 +T21011310123202303021021112021231011 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1194_F3 +T23303101033322220312200222013013312 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1204_F3 +T21330132231321322010303023221203200 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1306_F3 +T10332133020311023221213100301001220 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1387_F3 +T12301331310032132101301303230121111 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1431_F3 +T12011023331022213001123111301312011 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1560_F3 +T32212313302203320020222113111011111 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1591_F3 +T23202101330322130221230222201123202 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1624_F3 +T20122200222132200313011102302210332 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1826_F3 +T13012312120112021233030302313201111 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1903_F3 +T30232100103132133321330310210101221 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_574_1961_F3 +T02333101331223303300200011100032200 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_54_F3 +T13331330322230200102132110132013200 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_80_F3 +T33133322233322221003332230323312313 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_192_F3 +T30013012111133003301010212123302011 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_197_F3 +T33312113010133020301131330001310032 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_223_F3 +T10121010002202131221210302100121020 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_420_F3 +T31110103220000101310112112001020212 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_434_F3 +T30312132120223101113223301211113311 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_444_F3 +T33022120112320220100202132332113320 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_459_F3 +T31330310210101223330110231120131100 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_506_F3 +T31210200111210121332321310110132301 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_569_F3 +T10210201321323001012232322323002203 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_622_F3 +T01100031122111023002323113231210111 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_644_F3 +T11332003221203131231202200030110130 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_663_F3 +T32210013303112103322311101322021210 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_681_F3 +T23131132033020103031013233200101021 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_711_F3 +T03032331231101231020121210002332121 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_730_F3 +T31010102200110302123032330331011111 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_904_F3 +T20111213300020123200333321131121211 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_938_F3 +T13103102220022130222233301013033120 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_970_F3 +T23201311301023133303023011202220221 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_991_F3 +T33312212031111111012212120321121210 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1138_F3 +T23320002011320012120333103233301321 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1157_F3 +T13121323330203331222022230133102321 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1180_F3 +T32003310122102323303101123331133110 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1283_F3 +T02232200301300220130032321323131333 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1302_F3 +T01201303312333123130200123201013021 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1310_F3 +T31332131312021303211310220101211133 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1321_F3 +T11001010233200122122022023000203212 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1373_F3 +T21213011223311001221321132013121220 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1419_F3 +T33222200303001021230212332001013020 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1436_F3 +T13210313021212303321202113301220331 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1442_F3 +T33132010022331132101132123132020222 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1454_F3 +T11131130011012021120222231313211113 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1500_F3 +T11010000223111301132313011130103021 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1535_F3 +T21312012030320112110211013300131121 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1724_F3 +T33123002323300220213232301000010010 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1829_F3 +T21033321320111321230233302313101021 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1898_F3 +T31330110303103131001110300102101330 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_575_1964_F3 +T22010201103202213200201301300232123 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_32_F3 +T13012100120333032211330300332022110 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_74_F3 +T30103313210232220102021223012112100 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_86_F3 +T10320000121033022010011030032211310 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_89_F3 +T02132333203332020020220033002121120 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_266_F3 +T30322223101312011300311121221333223 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_327_F3 +T22112331301313021321001332120332130 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_331_F3 +T32012133301311223023011232112333030 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_387_F3 +T00101211032031120300200222001230022 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_406_F3 +T00223133010210122221320212103132011 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_449_F3 +T31312001121222231100020132132100220 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_519_F3 +T03011321130130133213131202130321131 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_603_F3 +T21003032313302312320131221001330311 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_655_F3 +T02001023130302322122200313123123102 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_677_F3 +T13330131023320301031013230210103022 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_718_F3 +T31232113331022231333313223132231213 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_722_F3 +T31230320322120231333030031100313200 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_754_F3 +T30221231132103120112331303112133020 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_815_F3 +T23022113203032010120310102321001031 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_882_F3 +T13230020122320223230022031020110122 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_898_F3 +T10230132312121033222231132231233213 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_923_F3 +T21322010320202013210121223010123122 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_930_F3 +T21322103230123110323102012021020013 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1019_F3 +T22032121213231032210312001103122312 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1068_F3 +T00020232013101330112220321203220211 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1131_F3 +T10233122200222132200313011102302210 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1168_F3 +T31013300131121323122002113301002010 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1207_F3 +T21001132013000122220301213221213010 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1289_F3 +T03021210023110200323310302013121203 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1329_F3 +T01100302102020113003022000120002100 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1367_F3 +T12231310311233110031222013332011023 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1416_F3 +T33021233100123120313103133211203221 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1461_F3 +T32022221221112233100210223002100100 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1605_F3 +T30232100103132133321330310210101221 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1664_F3 +T31212101001312110320301201002011120 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1671_F3 +T22313332300211322113223102231322313 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1729_F3 +T11233312313010012320101302101023030 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1880_F3 +T13032121323320213301001310130212003 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1982_F3 +T00032312310201201333221212000011030 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_1987_F3 +T30022313313231221213220132001011320 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_2014_F3 +T31123201010100321122111102113021003 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_576_2028_F3 +T20131211210311112023201213120201100 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_40_F3 +T11111212330120012020200031313303003 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_119_F3 +T33111010021103320103213121313000102 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_133_F3 +T33213323012231300122223032223331322 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_255_F3 +T00332022110020300332022020202002232 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_281_F3 +T03032301231212301013112222111210000 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_288_F3 +T01031120221303100221230021013201130 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_322_F3 +T12003213220230103303201000130312202 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_362_F3 +T31203302330110131230331210121110220 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_382_F3 +T32312123033111120321303230201332100 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_464_F3 +T13020221011130013102221333131203302 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_488_F3 +T13200302330322110200323132101120301 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_511_F3 +T32232133031023313331312220133230333 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_545_F3 +T00112131333222303222210031322103233 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_559_F3 +T32321101303233120102011130022122002 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_562_F3 +T32331101301233110121000220031120031 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_637_F3 +T22113312122202103031023120301031110 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_641_F3 +T13031301101121223221212020032131113 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@1831_577_692_F3 +T01122320200330103121202301211100220 ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff -r 46849d69d7e6 -r 674aaee19991 test-data/fastq_combiner_no_qual_decimal_out_1.fastqsanger --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastq_combiner_no_qual_decimal_out_1.fastqsanger Wed Feb 24 16:50:06 2010 -0500 @@ -0,0 +1,12 @@ +@SRR014849.50939 EIXKN4201BA2EC length=135 +GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG ++ +93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 +@SRR014849.110027 EIXKN4201APUB0 length=131 +CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG ++ +93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 +@SRR014849.203935 EIXKN4201B4HU6 length=144 +AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT ++ +93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 93 diff -r 46849d69d7e6 -r 674aaee19991 tools/fastq/fastq_combiner.py --- a/tools/fastq/fastq_combiner.py Wed Feb 24 16:48:22 2010 -0500 +++ b/tools/fastq/fastq_combiner.py Wed Feb 24 16:50:06 2010 -0500 @@ -1,6 +1,6 @@ #Dan Blankenberg import sys, os, shutil -from galaxy_utils.sequence.fastq import fastqWriter, fastqSequencingRead, fastqCombiner +from galaxy_utils.sequence.fastq import fastqWriter, fastqSequencingRead, fastqCombiner, fastqFakeFastaScoreReader from galaxy_utils.sequence.fasta import fastaReader, fastaNamedReader def main(): @@ -23,15 +23,18 @@ format = 'illumina' out = fastqWriter( open( output_filename, 'wb' ), format = format, force_quality_encoding = force_quality_encoding ) - qual_input = fastaNamedReader( open( qual_filename, 'rb' ) ) + if qual_filename == 'None': + qual_input = fastqFakeFastaScoreReader( format, quality_encoding = force_quality_encoding ) + else: + qual_input = fastaNamedReader( open( qual_filename, 'rb' ) ) + fastq_combiner = fastqCombiner( format ) i = None skip_count = 0 for i, sequence in enumerate( fastaReader( open( fasta_filename, 'rb' ) ) ): - quality = qual_input.get( sequence.identifier ) + quality = qual_input.get( sequence ) if quality: fastq_read = fastq_combiner.combine( sequence, quality ) - #Should we check that fastq read is valid? for now, assume groomer will be used to verify out.write( fastq_read ) else: skip_count += 1 diff -r 46849d69d7e6 -r 674aaee19991 tools/fastq/fastq_combiner.xml --- a/tools/fastq/fastq_combiner.xml Wed Feb 24 16:48:22 2010 -0500 +++ b/tools/fastq/fastq_combiner.xml Wed Feb 24 16:50:06 2010 -0500 @@ -3,7 +3,7 @@ <command interpreter="python">fastq_combiner.py '$fasta_file' '${fasta_file.extension}' '$qual_file' '${qual_file.extension}' '$output_file' '$force_quality_encoding'</command> <inputs> <param name="fasta_file" type="data" format="fasta,csfasta" label="FASTA File" /> - <param name="qual_file" type="data" format="qual" label="Quality Score File" /> + <param name="qual_file" type="data" format="qual" label="Quality Score File" optional="True" /> <param name="force_quality_encoding" type="select" label="Force Quality Score encoding"> <option value="None">Use Source Encoding</option> <option value="ascii" selected="True">ASCII</option> @@ -45,11 +45,25 @@ <param name="force_quality_encoding" value="decimal" /> <output name="output_file" file="wrapping_as_sanger_decimal.fastqsanger" /> </test> + <test> + <param name="fasta_file" value="fastq_combiner_in_1.fasta" ftype="fasta" /> + <param name="qual_file" /> + <param name="force_quality_encoding" value="decimal" /> + <output name="output_file" file="fastq_combiner_no_qual_decimal_out_1.fastqsanger" /> + </test> + <test> + <param name="fasta_file" value="s2fq_phiX.csfasta" ftype="csfasta" /> + <param name="qual_file" /> + <param name="force_quality_encoding" value="ascii" /> + <output name="output_file" file="fastq_combiner_no_qual_ascii_out_1.fastqsolid" /> + </test> </tests> <help> **What it does** This tool joins a FASTA file to a Quality Score file, creating a single FASTQ block for each read. +Specifying a set of quality scores is optional; when not provided, the output will be fastqsanger or fastqsolid (when a csfasta is provided) with each quality score being the maximal allowed value (93). + </help> </tool>