details:
http://www.bx.psu.edu/hg/galaxy/rev/8255ed4330e9
changeset: 2731:8255ed4330e9
user: Anton Nekrutenko <anton(a)bx.psu.edu>
date: Mon Sep 21 10:32:48 2009 -0400
description:
More SR changes
5 file(s) affected in this change:
tool_conf.xml.sample
tools/samtools/pileup_parser.pl
tools/samtools/sam2interval.py
tools/samtools/sam_bitwise_flag_filter.py
tools/sr_mapping/bowtie_wrapper.xml
diffs (196 lines):
diff -r ebf68a725c09 -r 8255ed4330e9 tool_conf.xml.sample
--- a/tool_conf.xml.sample Sun Sep 20 11:48:51 2009 -0400
+++ b/tool_conf.xml.sample Mon Sep 21 10:32:48 2009 -0400
@@ -169,29 +169,28 @@
<tool file="fastx_toolkit/fasta_nucleotide_changer.xml" />
<tool file="fastx_toolkit/fastx_collapser.xml" />
</section>
- <section name="NGS: FASTQ QC and manipulation"
id="cshl_library_information">
- <tool file="fastx_toolkit/fastx_quality_statistics.xml" />
- <tool file="fastx_toolkit/fastq_quality_boxplot.xml" />
- <tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" />
- <!-- <tool file="fastx_toolkit/fasta_clipping_histogram.xml" />
-->
- <tool file="fastx_toolkit/fastq_to_fasta.xml" />
- <tool file="fastx_toolkit/fastq_quality_converter.xml" />
- <!-- <tool file="fastx_toolkit/fastx_clipper.xml" /> -->
- <tool file="fastx_toolkit/fastx_trimmer.xml" />
- <tool file="fastx_toolkit/fastx_renamer.xml" />
- <tool file="fastx_toolkit/fastx_reverse_complement.xml" />
- <tool file="fastx_toolkit/fastx_artifacts_filter.xml" />
- <tool file="fastx_toolkit/fastq_quality_filter.xml" />
- <!--<tool file="fastx_toolkit/fastx_barcode_splitter.xml"
/>-->
- <tool file="metag_tools/split_paired_reads.xml" />
- </section>
- <section name="454: QC" id="short_read_analysis">
- <tool file="metag_tools/short_reads_figure_score.xml" />
- <tool file="metag_tools/short_reads_trim_seq.xml" />
- </section>
- <section name="SOLiD: QC" id="solid_tools">
- <tool file="solid_tools/solid_qual_stats.xml" />
- <tool file="solid_tools/solid_qual_boxplot.xml" />
+ <section name="NGS: QC and manipulation"
id="cshl_library_information">
+ <label text="Genetic FASTQ data" id="fastq" />
+ <tool file="fastx_toolkit/fastx_quality_statistics.xml" />
+ <tool file="fastx_toolkit/fastq_quality_boxplot.xml" />
+ <tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" />
+ <!-- <tool file="fastx_toolkit/fasta_clipping_histogram.xml"
/> -->
+ <tool file="fastx_toolkit/fastq_to_fasta.xml" />
+ <tool file="fastx_toolkit/fastq_quality_converter.xml" />
+ <!-- <tool file="fastx_toolkit/fastx_clipper.xml" /> -->
+ <tool file="fastx_toolkit/fastx_trimmer.xml" />
+ <tool file="fastx_toolkit/fastx_renamer.xml" />
+ <tool file="fastx_toolkit/fastx_reverse_complement.xml" />
+ <tool file="fastx_toolkit/fastx_artifacts_filter.xml" />
+ <tool file="fastx_toolkit/fastq_quality_filter.xml" />
+ <!--<tool file="fastx_toolkit/fastx_barcode_splitter.xml"
/>-->
+ <tool file="metag_tools/split_paired_reads.xml" />
+ <label text="Roche-454 Specific" id="454" />
+ <tool file="metag_tools/short_reads_figure_score.xml" />
+ <tool file="metag_tools/short_reads_trim_seq.xml" />
+ <label text="AB-SOLiD Specific" id="solid" />
+ <tool file="solid_tools/solid_qual_stats.xml" />
+ <tool file="solid_tools/solid_qual_boxplot.xml" />
</section>
<section name="NGS: Mapping" id="solexa_tools">
<!-- <tool file="sr_mapping/lastz_wrapper.xml" /> -->
diff -r ebf68a725c09 -r 8255ed4330e9 tools/samtools/pileup_parser.pl
--- a/tools/samtools/pileup_parser.pl Sun Sep 20 11:48:51 2009 -0400
+++ b/tools/samtools/pileup_parser.pl Mon Sep 21 10:32:48 2009 -0400
@@ -1,6 +1,8 @@
#! /usr/bin/perl -w
use strict;
+use POSIX;
+
die "Usage: pileup_parser.pl <in_file> <ref_base_column>
<read_bases_column> <base_quality_column> <coverage column> <qv
cutoff> <coverage cutoff> <SNPs only?> <output bed?>
<coord_column> <out_file>\n" unless @ARGV == 11;
@@ -28,10 +30,12 @@
while (<IN>) {
chop;
+ next if m/^\#/;
my @fields = split /\t/;
next if $fields[ $ref_base_column ] eq "*"; # skip indel lines
- next if $fields[ $cvrg_column ] < $cvrg_cutoff; # skip low coverage lines
- my $read_bases = $fields[ $read_bases_column ];
+ my $read_bases = $fields[ $read_bases_column ];
+ die "Coverage column" . ($cvrg_column+1) . " contains non-numeric
values. Check your input parameters as well as format of input dataset." if ( not
isdigit $fields[ $cvrg_column ] );
+ next if $fields[ $cvrg_column ] < $cvrg_cutoff;
my $base_quality = $fields[ $base_quality_column ];
if ($read_bases =~ m/[\$\^\+-]/) {
@@ -42,11 +46,8 @@
$read_bases =~ s/[\+-]{1}$indel_len.{$indel_len}//; # remove indel info from read base
field
}
}
- if ( length($read_bases) != length($base_quality) ) {
-
- $first_skipped_line = $_ if $invalid_line_counter == 0;
- ++$invalid_line_counter;
- }
+ die "Error parsing read bases and qualities in line $.. Last processed line
conatined these values: " . join("\t", @fields) . "\n" if (
length($read_bases) != length($base_quality) );
+
# after removing read block and indel data the length of read_base
# field should identical to the length of base_quality field
@@ -95,4 +96,4 @@
print STDERR "Could not parse $invalid_line_counter line(s) beginning with:
$first_skipped_line\n" if $invalid_line_counter > 0;
close IN;
-close OUT;
\ No newline at end of file
+close OUT;
diff -r ebf68a725c09 -r 8255ed4330e9 tools/samtools/sam2interval.py
--- a/tools/samtools/sam2interval.py Sun Sep 20 11:48:51 2009 -0400
+++ b/tools/samtools/sam2interval.py Mon Sep 21 10:32:48 2009 -0400
@@ -78,7 +78,7 @@
for line in infile:
line = line.rstrip( '\r\n' )
- if line and not line.startswith( '#,@' ):
+ if line and not line.startswith( '#' ) and not line.startswith(
'@' ) :
fields = line.split( '\t' )
start = int( fields[ int( options.start_col ) - 1 ] ) - 1
end = 0
diff -r ebf68a725c09 -r 8255ed4330e9 tools/samtools/sam_bitwise_flag_filter.py
--- a/tools/samtools/sam_bitwise_flag_filter.py Sun Sep 20 11:48:51 2009 -0400
+++ b/tools/samtools/sam_bitwise_flag_filter.py Mon Sep 21 10:32:48 2009 -0400
@@ -135,7 +135,7 @@
for line in infile:
line = line.rstrip( '\r\n' )
- if line and not line.startswith( '#' ):
+ if line and not line.startswith( '#' ) and not line.startswith(
'@' ) :
fields = line.split( '\t' )
sam_states = []
sam_states.append( bool( int( fields[ int( options.flag_col ) - 1 ] ) &
0x0001 ) )
diff -r ebf68a725c09 -r 8255ed4330e9 tools/sr_mapping/bowtie_wrapper.xml
--- a/tools/sr_mapping/bowtie_wrapper.xml Sun Sep 20 11:48:51 2009 -0400
+++ b/tools/sr_mapping/bowtie_wrapper.xml Mon Sep 21 10:32:48 2009 -0400
@@ -336,7 +336,7 @@
</param>
</when>
</conditional>
- <param name="offrate" type="integer"
value="-1" label="Override the offrate of the index to n -o)"
help="-1 for default" />
+ <param name="offrate" type="integer"
value="-1" label="Override the offrate of the index to n (-o)"
help="-1 for default" />
<param name="seed" type="integer" value="-1"
label="Seed for pseudo-random number generator (--seed)" help="-1 for
default" />
</when> <!-- full -->
</conditional> <!-- params -->
@@ -443,31 +443,34 @@
The output is in SAM format, and has the following columns::
- 1 QNAME - Query (pair) NAME
- 2 FLAG - bitwise FLAG
- 3 RNAME - Reference sequence NAME
- 4 POS - 1-based leftmost POSition/coordinate of clipped sequence
- 5 MAPQ - MAPping Quality (Phred-scaled)
- 6 CIGAR - extended CIGAR string
- 7 MRNM - Mate Reference sequence NaMe ('=' if same as RNAME)
- 8 MPOS - 1-based Mate POSition
- 9 ISIZE - Inferred insert SIZE
- 10 SEQ - query SEQuence on the same strand as the reference
- 11 QUAL - query QUALity (ASCII-33 gives the Phred base quality)
- 12 OPT - variable OPTional fields in the format TAG:VTYPE:VALU
+ Column Description
+ -------- --------------------------------------------------------
+ 1 QNAME Query (pair) NAME
+ 2 FLAG bitwise FLAG
+ 3 RNAME Reference sequence NAME
+ 4 POS 1-based leftmost POSition/coordinate of clipped sequence
+ 5 MAPQ MAPping Quality (Phred-scaled)
+ 6 CIGAR extended CIGAR string
+ 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME)
+ 8 MPOS 1-based Mate POSition
+ 9 ISIZE Inferred insert SIZE
+ 10 SEQ query SEQuence on the same strand as the reference
+ 11 QUAL query QUALity (ASCII-33 gives the Phred base quality)
+ 12 OPT variable OPTional fields in the format TAG:VTYPE:VALUE
The flags are as follows::
- Flag - Description
- 0x0001 - the read is paired in sequencing
- 0x0002 - the read is mapped in a proper pair
- 0x0004 - the query sequence itself is unmapped
- 0x0008 - the mate is unmapped
- 0x0010 - strand of the query (1 for reverse)
- 0x0020 - strand of the mate
- 0x0040 - the read is the first read in a pair
- 0x0080 - the read is the second read in a pair
- 0x0100 - the alignment is not primary
+ Flag Description
+ ------ -------------------------------------
+ 0x0001 the read is paired in sequencing
+ 0x0002 the read is mapped in a proper pair
+ 0x0004 the query sequence itself is unmapped
+ 0x0008 the mate is unmapped
+ 0x0010 strand of the query (1 for reverse)
+ 0x0020 strand of the mate
+ 0x0040 the read is the first read in a pair
+ 0x0080 the read is the second read in a pair
+ 0x0100 the alignment is not primary
It looks like this (scroll sideways to see the entire example)::