details: http://www.bx.psu.edu/hg/galaxy/rev/58ba6fbe22b3 changeset: 3624:58ba6fbe22b3 user: Dan Blankenberg <dan@bx.psu.edu> date: Fri Apr 09 11:35:02 2010 -0400 description: Add additional tests to FASTQ Quality Trimmer tool diffstat: test-data/sanger_full_range_empty_reads.fastqsanger | 8 ++++++ tools/fastq/fastq_trimmer_by_quality.py | 11 ++++++-- tools/fastq/fastq_trimmer_by_quality.xml | 26 +++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diffs (90 lines): diff -r ac044bb17d24 -r 58ba6fbe22b3 test-data/sanger_full_range_empty_reads.fastqsanger --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sanger_full_range_empty_reads.fastqsanger Fri Apr 09 11:35:02 2010 -0400 @@ -0,0 +1,8 @@ +@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order) + ++ + +@FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order) + ++ + diff -r ac044bb17d24 -r 58ba6fbe22b3 tools/fastq/fastq_trimmer_by_quality.py --- a/tools/fastq/fastq_trimmer_by_quality.py Fri Apr 09 11:29:26 2010 -0400 +++ b/tools/fastq/fastq_trimmer_by_quality.py Fri Apr 09 11:35:02 2010 -0400 @@ -55,6 +55,12 @@ if len ( args ) != 2: parser.error( "Need to specify an input file and an output file" ) + if options.window_size < 1: + parser.error( 'You must specify a strictly positive window size' ) + + if options.window_step < 1: + parser.error( 'You must specify a strictly positive step size' ) + #determine an exhaustive list of window indexes that can be excluded from aggregation exclude_window_indexes = [] last_exclude_indexes = [] @@ -76,7 +82,6 @@ out = fastqWriter( open( args[1], 'wb' ), format = options.format ) action = ACTION_METHODS[ options.aggregation_action ] - window_step = abs( options.window_step ) num_reads = None num_reads_excluded = 0 @@ -93,7 +98,7 @@ if exclude_and_compare( action, quality_list[ lwindow_position:lwindow_position + options.window_size ], options.score_comparison, options.quality_score, exclude_window_indexes ): fastq_read = fastq_read.slice( lwindow_position, None ) break - lwindow_position += window_step + lwindow_position += options.window_step else: rwindow_position = len( quality_list ) #right position of window while True: @@ -105,7 +110,7 @@ if exclude_and_compare( action, quality_list[ lwindow_position:rwindow_position ], options.score_comparison, options.quality_score, exclude_window_indexes ): fastq_read = fastq_read.slice( None, rwindow_position ) break - rwindow_position -= window_step + rwindow_position -= options.window_step if options.keep_zero_length or len( fastq_read ): out.write( fastq_read ) else: diff -r ac044bb17d24 -r 58ba6fbe22b3 tools/fastq/fastq_trimmer_by_quality.xml --- a/tools/fastq/fastq_trimmer_by_quality.xml Fri Apr 09 11:29:26 2010 -0400 +++ b/tools/fastq/fastq_trimmer_by_quality.xml Fri Apr 09 11:35:02 2010 -0400 @@ -94,6 +94,32 @@ <param name="quality_score" value="1"/> <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> </test> + <test> + <!-- Trim entire sequences; keep empty reads --> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="keep_zero_length" value="true" /> + <param name="trim_ends" value="53"/> + <param name="window_size" value="1"/> + <param name="step_size" value="1"/> + <param name="exclude_count" value="0"/> + <param name="aggregation_action" value="min"/> + <param name="score_comparison" value=">="/> + <param name="quality_score" value="999"/> + <output name="output_file" file="sanger_full_range_empty_reads.fastqsanger" /> + </test> + <test> + <!-- Trim entire sequences; discard empty reads --> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="keep_zero_length"/> + <param name="trim_ends" value="53"/> + <param name="window_size" value="1"/> + <param name="step_size" value="1"/> + <param name="exclude_count" value="0"/> + <param name="aggregation_action" value="min"/> + <param name="score_comparison" value=">="/> + <param name="quality_score" value="999"/> + <output name="output_file" file="empty_file.dat" /> + </test> </tests> <help> This tool allows you to trim the ends of reads based upon the aggregate value of quality scores found within a sliding window; a sliding window of size 1 is equivalent to 'simple' trimming of the ends.