details: http://www.bx.psu.edu/hg/galaxy/rev/8255ed4330e9 changeset: 2731:8255ed4330e9 user: Anton Nekrutenko anton@bx.psu.edu date: Mon Sep 21 10:32:48 2009 -0400 description: More SR changes
5 file(s) affected in this change:
tool_conf.xml.sample tools/samtools/pileup_parser.pl tools/samtools/sam2interval.py tools/samtools/sam_bitwise_flag_filter.py tools/sr_mapping/bowtie_wrapper.xml
diffs (196 lines):
diff -r ebf68a725c09 -r 8255ed4330e9 tool_conf.xml.sample --- a/tool_conf.xml.sample Sun Sep 20 11:48:51 2009 -0400 +++ b/tool_conf.xml.sample Mon Sep 21 10:32:48 2009 -0400 @@ -169,29 +169,28 @@ <tool file="fastx_toolkit/fasta_nucleotide_changer.xml" /> <tool file="fastx_toolkit/fastx_collapser.xml" /> </section> - <section name="NGS: FASTQ QC and manipulation" id="cshl_library_information"> - <tool file="fastx_toolkit/fastx_quality_statistics.xml" /> - <tool file="fastx_toolkit/fastq_quality_boxplot.xml" /> - <tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" /> - <!-- <tool file="fastx_toolkit/fasta_clipping_histogram.xml" /> --> - <tool file="fastx_toolkit/fastq_to_fasta.xml" /> - <tool file="fastx_toolkit/fastq_quality_converter.xml" /> - <!-- <tool file="fastx_toolkit/fastx_clipper.xml" /> --> - <tool file="fastx_toolkit/fastx_trimmer.xml" /> - <tool file="fastx_toolkit/fastx_renamer.xml" /> - <tool file="fastx_toolkit/fastx_reverse_complement.xml" /> - <tool file="fastx_toolkit/fastx_artifacts_filter.xml" /> - <tool file="fastx_toolkit/fastq_quality_filter.xml" /> - <!--<tool file="fastx_toolkit/fastx_barcode_splitter.xml" />--> - <tool file="metag_tools/split_paired_reads.xml" /> - </section> - <section name="454: QC" id="short_read_analysis"> - <tool file="metag_tools/short_reads_figure_score.xml" /> - <tool file="metag_tools/short_reads_trim_seq.xml" /> - </section> - <section name="SOLiD: QC" id="solid_tools"> - <tool file="solid_tools/solid_qual_stats.xml" /> - <tool file="solid_tools/solid_qual_boxplot.xml" /> + <section name="NGS: QC and manipulation" id="cshl_library_information"> + <label text="Genetic FASTQ data" id="fastq" /> + <tool file="fastx_toolkit/fastx_quality_statistics.xml" /> + <tool file="fastx_toolkit/fastq_quality_boxplot.xml" /> + <tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" /> + <!-- <tool file="fastx_toolkit/fasta_clipping_histogram.xml" /> --> + <tool file="fastx_toolkit/fastq_to_fasta.xml" /> + <tool file="fastx_toolkit/fastq_quality_converter.xml" /> + <!-- <tool file="fastx_toolkit/fastx_clipper.xml" /> --> + <tool file="fastx_toolkit/fastx_trimmer.xml" /> + <tool file="fastx_toolkit/fastx_renamer.xml" /> + <tool file="fastx_toolkit/fastx_reverse_complement.xml" /> + <tool file="fastx_toolkit/fastx_artifacts_filter.xml" /> + <tool file="fastx_toolkit/fastq_quality_filter.xml" /> + <!--<tool file="fastx_toolkit/fastx_barcode_splitter.xml" />--> + <tool file="metag_tools/split_paired_reads.xml" /> + <label text="Roche-454 Specific" id="454" /> + <tool file="metag_tools/short_reads_figure_score.xml" /> + <tool file="metag_tools/short_reads_trim_seq.xml" /> + <label text="AB-SOLiD Specific" id="solid" /> + <tool file="solid_tools/solid_qual_stats.xml" /> + <tool file="solid_tools/solid_qual_boxplot.xml" /> </section> <section name="NGS: Mapping" id="solexa_tools"> <!-- <tool file="sr_mapping/lastz_wrapper.xml" /> --> diff -r ebf68a725c09 -r 8255ed4330e9 tools/samtools/pileup_parser.pl --- a/tools/samtools/pileup_parser.pl Sun Sep 20 11:48:51 2009 -0400 +++ b/tools/samtools/pileup_parser.pl Mon Sep 21 10:32:48 2009 -0400 @@ -1,6 +1,8 @@ #! /usr/bin/perl -w
use strict; +use POSIX; +
die "Usage: pileup_parser.pl <in_file> <ref_base_column> <read_bases_column> <base_quality_column> <coverage column> <qv cutoff> <coverage cutoff> <SNPs only?> <output bed?> <coord_column> <out_file>\n" unless @ARGV == 11;
@@ -28,10 +30,12 @@
while (<IN>) { chop; + next if m/^#/; my @fields = split /\t/; next if $fields[ $ref_base_column ] eq "*"; # skip indel lines - next if $fields[ $cvrg_column ] < $cvrg_cutoff; # skip low coverage lines - my $read_bases = $fields[ $read_bases_column ]; + my $read_bases = $fields[ $read_bases_column ]; + die "Coverage column" . ($cvrg_column+1) . " contains non-numeric values. Check your input parameters as well as format of input dataset." if ( not isdigit $fields[ $cvrg_column ] ); + next if $fields[ $cvrg_column ] < $cvrg_cutoff; my $base_quality = $fields[ $base_quality_column ]; if ($read_bases =~ m/[$^+-]/) { @@ -42,11 +46,8 @@ $read_bases =~ s/[+-]{1}$indel_len.{$indel_len}//; # remove indel info from read base field } } - if ( length($read_bases) != length($base_quality) ) { - - $first_skipped_line = $_ if $invalid_line_counter == 0; - ++$invalid_line_counter; - } + die "Error parsing read bases and qualities in line $.. Last processed line conatined these values: " . join("\t", @fields) . "\n" if ( length($read_bases) != length($base_quality) ); + # after removing read block and indel data the length of read_base # field should identical to the length of base_quality field @@ -95,4 +96,4 @@
print STDERR "Could not parse $invalid_line_counter line(s) beginning with: $first_skipped_line\n" if $invalid_line_counter > 0; close IN; -close OUT; \ No newline at end of file +close OUT; diff -r ebf68a725c09 -r 8255ed4330e9 tools/samtools/sam2interval.py --- a/tools/samtools/sam2interval.py Sun Sep 20 11:48:51 2009 -0400 +++ b/tools/samtools/sam2interval.py Mon Sep 21 10:32:48 2009 -0400 @@ -78,7 +78,7 @@
for line in infile: line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#,@' ): + if line and not line.startswith( '#' ) and not line.startswith( '@' ) : fields = line.split( '\t' ) start = int( fields[ int( options.start_col ) - 1 ] ) - 1 end = 0 diff -r ebf68a725c09 -r 8255ed4330e9 tools/samtools/sam_bitwise_flag_filter.py --- a/tools/samtools/sam_bitwise_flag_filter.py Sun Sep 20 11:48:51 2009 -0400 +++ b/tools/samtools/sam_bitwise_flag_filter.py Mon Sep 21 10:32:48 2009 -0400 @@ -135,7 +135,7 @@
for line in infile: line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): + if line and not line.startswith( '#' ) and not line.startswith( '@' ) : fields = line.split( '\t' ) sam_states = [] sam_states.append( bool( int( fields[ int( options.flag_col ) - 1 ] ) & 0x0001 ) ) diff -r ebf68a725c09 -r 8255ed4330e9 tools/sr_mapping/bowtie_wrapper.xml --- a/tools/sr_mapping/bowtie_wrapper.xml Sun Sep 20 11:48:51 2009 -0400 +++ b/tools/sr_mapping/bowtie_wrapper.xml Mon Sep 21 10:32:48 2009 -0400 @@ -336,7 +336,7 @@ </param> </when> </conditional> - <param name="offrate" type="integer" value="-1" label="Override the offrate of the index to n -o)" help="-1 for default" /> + <param name="offrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" /> <param name="seed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" /> </when> <!-- full --> </conditional> <!-- params --> @@ -443,31 +443,34 @@
The output is in SAM format, and has the following columns::
- 1 QNAME - Query (pair) NAME - 2 FLAG - bitwise FLAG - 3 RNAME - Reference sequence NAME - 4 POS - 1-based leftmost POSition/coordinate of clipped sequence - 5 MAPQ - MAPping Quality (Phred-scaled) - 6 CIGAR - extended CIGAR string - 7 MRNM - Mate Reference sequence NaMe ('=' if same as RNAME) - 8 MPOS - 1-based Mate POSition - 9 ISIZE - Inferred insert SIZE - 10 SEQ - query SEQuence on the same strand as the reference - 11 QUAL - query QUALity (ASCII-33 gives the Phred base quality) - 12 OPT - variable OPTional fields in the format TAG:VTYPE:VALU + Column Description + -------- -------------------------------------------------------- + 1 QNAME Query (pair) NAME + 2 FLAG bitwise FLAG + 3 RNAME Reference sequence NAME + 4 POS 1-based leftmost POSition/coordinate of clipped sequence + 5 MAPQ MAPping Quality (Phred-scaled) + 6 CIGAR extended CIGAR string + 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) + 8 MPOS 1-based Mate POSition + 9 ISIZE Inferred insert SIZE + 10 SEQ query SEQuence on the same strand as the reference + 11 QUAL query QUALity (ASCII-33 gives the Phred base quality) + 12 OPT variable OPTional fields in the format TAG:VTYPE:VALUE
The flags are as follows::
- Flag - Description - 0x0001 - the read is paired in sequencing - 0x0002 - the read is mapped in a proper pair - 0x0004 - the query sequence itself is unmapped - 0x0008 - the mate is unmapped - 0x0010 - strand of the query (1 for reverse) - 0x0020 - strand of the mate - 0x0040 - the read is the first read in a pair - 0x0080 - the read is the second read in a pair - 0x0100 - the alignment is not primary + Flag Description + ------ ------------------------------------- + 0x0001 the read is paired in sequencing + 0x0002 the read is mapped in a proper pair + 0x0004 the query sequence itself is unmapped + 0x0008 the mate is unmapped + 0x0010 strand of the query (1 for reverse) + 0x0020 strand of the mate + 0x0040 the read is the first read in a pair + 0x0080 the read is the second read in a pair + 0x0100 the alignment is not primary
It looks like this (scroll sideways to see the entire example)::