details: http://www.bx.psu.edu/hg/galaxy/rev/4a3e48e1e8e1 changeset: 2873:4a3e48e1e8e1 user: Kelly Vincent <kpvincent@bx.psu.edu> date: Mon Oct 12 14:04:28 2009 -0400 description: Changed FASTQ Groomer's handling of Sanger data to output all lines even if only first n are validated 3 file(s) affected in this change: test-data/fastq_gen_conv_in2.fastq test-data/fastq_gen_conv_out2.fastqsanger tools/next_gen_conversion/fastq_gen_conv.py diffs (74 lines): diff -r 9d67ae5ecda7 -r 4a3e48e1e8e1 test-data/fastq_gen_conv_in2.fastq --- a/test-data/fastq_gen_conv_in2.fastq Mon Oct 12 13:19:31 2009 -0400 +++ b/test-data/fastq_gen_conv_in2.fastq Mon Oct 12 14:04:28 2009 -0400 @@ -5,7 +5,7 @@ @seq2 GGGTCTCCCAGAATGATTAGAGCCGTATAGGA + -?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\] +?@ABCDEFGHIJK MNOPQRSTUVWXYZ[\\] @seq3 GCGGTTCAATACGATTACCACCATGATAAATA + diff -r 9d67ae5ecda7 -r 4a3e48e1e8e1 test-data/fastq_gen_conv_out2.fastqsanger --- a/test-data/fastq_gen_conv_out2.fastqsanger Mon Oct 12 13:19:31 2009 -0400 +++ b/test-data/fastq_gen_conv_out2.fastqsanger Mon Oct 12 14:04:28 2009 -0400 @@ -2,11 +2,19 @@ AAAGGTTTCTCTTTTGGAAATATCTAAATCCC + !"#$%&\'()*+,-./0123456789:;<=>. -@seq2 -GGGTCTCCCAGAATGATTAGAGCCGTATAGGA -+ -?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\] @seq3 GCGGTTCAATACGATTACCACCATGATAAATA + ?Aa.1ghB2K!#lk(02GY[[II])Kwl+,5M +@seq4 +AGTCTTTTCCTCTAAAATAACATAGGATACTA ++ +ghY)N375Nh.,Ol>==/<:2#i&d%#KdNII +@seq5 +GAGGACTCATGGTAGGTATTTTACATGACATT ++ +IIgy%hf6#394bd&hNMWL$OPB63II*,+- +@seq6 +GGCCTACATTCATTTACGAGACTAATTAGGGA ++ +IIIIIgd6#5%jKO&.,D+s3aW=cdGB#a1$ \ No newline at end of file diff -r 9d67ae5ecda7 -r 4a3e48e1e8e1 tools/next_gen_conversion/fastq_gen_conv.py --- a/tools/next_gen_conversion/fastq_gen_conv.py Mon Oct 12 13:19:31 2009 -0400 +++ b/tools/next_gen_conversion/fastq_gen_conv.py Mon Oct 12 14:04:28 2009 -0400 @@ -47,14 +47,15 @@ lines = [] line = fin.readline() while line: - if max_blocks >= 0 and block_num > 0 and orig_type == 'sanger' and max_blocks < block_num: - print 'break' - break - if line.strip(): - # the line that starts of a block, with a name + if line.strip() and max_blocks >= 0 and block_num > 0 and orig_type == 'sanger' and block_num >= max_blocks: + fout.write(line) + if line_count % 4 == 0: + block_num += 1 + line_count += 1 + elif line.strip(): + # the line that starts a block, with a name if line_count % 4 == 0 and line.startswith('@'): lines.append(line) - block_num += 1 else: # if we expect a sequence of bases if line_count % 4 == 1 and all_bases_valid(line.strip()): @@ -154,6 +155,8 @@ bad_blocks += 1 base_len = -1 lines = [] + # mark the successful end of a block + block_num += 1 line_count += 1 line = fin.readline() fout.close()