details: http://www.bx.psu.edu/hg/galaxy/rev/f7525fb463e0 changeset: 3818:f7525fb463e0 user: Dan Blankenberg <dan@bx.psu.edu> date: Mon May 24 15:33:55 2010 -0400 description: Allow FASTQ Groomer/parser to work on tab-delimited decimal scores. diffstat: lib/galaxy_utils/sequence/fastq.py | 11 ++++++++--- test-data/sanger_full_range_as_tab_decimal_sanger.fastqsanger | 8 ++++++++ tools/fastq/fastq_groomer.xml | 11 ++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diffs (72 lines): diff -r 86fe916dbdb5 -r f7525fb463e0 lib/galaxy_utils/sequence/fastq.py --- a/lib/galaxy_utils/sequence/fastq.py Mon May 24 15:10:31 2010 -0400 +++ b/lib/galaxy_utils/sequence/fastq.py Mon May 24 15:33:55 2010 -0400 @@ -41,7 +41,12 @@ def convert_color_to_base_space( cls, sequence ): return cls.color_space_converter.to_base_space( sequence ) def is_ascii_encoded( self ): - return ' ' not in self.quality #as per fastq definition only decimal quality strings can have spaces in them (and must have a trailing space) + #as per fastq definition only decimal quality strings can have spaces (and TABs for our purposes) in them (and must have a trailing space) + if ' ' in self.quality: + return False + if '\t' in self.quality: + return False + return True def get_ascii_quality_scores( self ): if self.is_ascii_encoded(): return list( self.quality ) @@ -49,7 +54,7 @@ quality = self.quality.rstrip() #decimal scores should have a trailing space if quality: try: - return [ chr( int( val ) + self.ascii_min - self.quality_min ) for val in quality.split( ' ' ) ] + return [ chr( int( val ) + self.ascii_min - self.quality_min ) for val in quality.split() ] except ValueError, e: raise ValueError( 'Error Parsing quality String. ASCII quality strings cannot contain spaces (%s): %s' % ( self.quality, e ) ) else: @@ -60,7 +65,7 @@ else: quality = self.quality.rstrip() #decimal scores should have a trailing space if quality: - return [ int( val ) for val in quality.split( ' ' ) if val.strip() ] + return [ int( val ) for val in quality.split() if val.strip() ] else: return [] def convert_read_to_format( self, format, force_quality_encoding = None ): diff -r 86fe916dbdb5 -r f7525fb463e0 test-data/sanger_full_range_as_tab_decimal_sanger.fastqsanger --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sanger_full_range_as_tab_decimal_sanger.fastqsanger Mon May 24 15:33:55 2010 -0400 @@ -0,0 +1,8 @@ +@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ++ +0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 +@FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order) +CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +93 92 91 90 89 88 87 86 85 84 83 82 81 80 79 78 77 76 75 74 73 72 71 70 69 68 67 66 65 64 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff -r 86fe916dbdb5 -r f7525fb463e0 tools/fastq/fastq_groomer.xml --- a/tools/fastq/fastq_groomer.xml Mon May 24 15:10:31 2010 -0400 +++ b/tools/fastq/fastq_groomer.xml Mon May 24 15:33:55 2010 -0400 @@ -1,4 +1,4 @@ -<tool id="fastq_groomer" name="FASTQ Groomer" version="1.0.2"> +<tool id="fastq_groomer" name="FASTQ Groomer" version="1.0.3"> <description>convert between various FASTQ quality formats</description> <command interpreter="python">fastq_groomer.py '$input_file' '$input_type' '$output_file' #if str( $options_type['options_type_selector'] ) == 'basic': @@ -288,6 +288,15 @@ <param name="summarize_input" value="summarize_input" /> <output name="output_file" file="sanger_full_range_as_decimal_sanger.fastqsanger" /> </test> + <test> + <param name="input_file" value="sanger_full_range_as_tab_decimal_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="sanger" /> + <param name="force_quality_encoding" value="ascii" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> + </test> <!-- Solexa, range -5 - 62 --> <test> <param name="input_file" value="solexa_full_range_as_decimal_solexa.fastqsolexa" ftype="fastq" />