[hg] galaxy 2733: merge

25 Sep 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/2f848dc3e35c
changeset: 2733:2f848dc3e35c
user:      jeremy goecks <jeremy.goecks@emory.edu>
date:      Mon Sep 21 10:40:25 2009 -0400
description:
merge

0 file(s) affected in this change:


diffs (196 lines):

diff -r c2e3183756da -r 2f848dc3e35c tool_conf.xml.sample

--- a/tool_conf.xml.sample	Mon Sep 21 10:39:58 2009 -0400
+++ b/tool_conf.xml.sample	Mon Sep 21 10:40:25 2009 -0400
@@ -169,29 +169,28 @@
     <tool file="fastx_toolkit/fasta_nucleotide_changer.xml" />
     <tool file="fastx_toolkit/fastx_collapser.xml" />
   </section>
-  <section name="NGS: FASTQ QC and manipulation" id="cshl_library_information">
-    <tool file="fastx_toolkit/fastx_quality_statistics.xml" />
-    <tool file="fastx_toolkit/fastq_quality_boxplot.xml" />
-    <tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" />
-    <!-- <tool file="fastx_toolkit/fasta_clipping_histogram.xml" /> -->
-    <tool file="fastx_toolkit/fastq_to_fasta.xml" />
-    <tool file="fastx_toolkit/fastq_quality_converter.xml" />
-    <!-- <tool file="fastx_toolkit/fastx_clipper.xml" /> -->
-    <tool file="fastx_toolkit/fastx_trimmer.xml" />
-    <tool file="fastx_toolkit/fastx_renamer.xml" />
-    <tool file="fastx_toolkit/fastx_reverse_complement.xml" />
-    <tool file="fastx_toolkit/fastx_artifacts_filter.xml" />
-    <tool file="fastx_toolkit/fastq_quality_filter.xml" />
-    <!--<tool file="fastx_toolkit/fastx_barcode_splitter.xml" />-->
-    <tool file="metag_tools/split_paired_reads.xml" />
-  </section>
-  <section name="454: QC" id="short_read_analysis">
-    <tool file="metag_tools/short_reads_figure_score.xml" />
-    <tool file="metag_tools/short_reads_trim_seq.xml" />
-  </section>
-  <section name="SOLiD: QC" id="solid_tools">
-    <tool file="solid_tools/solid_qual_stats.xml" />
-    <tool file="solid_tools/solid_qual_boxplot.xml" />
+  <section name="NGS: QC and manipulation" id="cshl_library_information">
+    <label text="Genetic FASTQ data" id="fastq" />
+        <tool file="fastx_toolkit/fastx_quality_statistics.xml" />
+        <tool file="fastx_toolkit/fastq_quality_boxplot.xml" />
+        <tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" />
+        <!-- <tool file="fastx_toolkit/fasta_clipping_histogram.xml" /> -->
+        <tool file="fastx_toolkit/fastq_to_fasta.xml" />
+        <tool file="fastx_toolkit/fastq_quality_converter.xml" />
+        <!-- <tool file="fastx_toolkit/fastx_clipper.xml" /> -->
+        <tool file="fastx_toolkit/fastx_trimmer.xml" />
+        <tool file="fastx_toolkit/fastx_renamer.xml" />
+        <tool file="fastx_toolkit/fastx_reverse_complement.xml" />
+        <tool file="fastx_toolkit/fastx_artifacts_filter.xml" />
+        <tool file="fastx_toolkit/fastq_quality_filter.xml" />
+        <!--<tool file="fastx_toolkit/fastx_barcode_splitter.xml" />-->
+        <tool file="metag_tools/split_paired_reads.xml" />
+    <label text="Roche-454 Specific" id="454" />
+        <tool file="metag_tools/short_reads_figure_score.xml" />
+        <tool file="metag_tools/short_reads_trim_seq.xml" />
+    <label text="AB-SOLiD Specific" id="solid" />
+        <tool file="solid_tools/solid_qual_stats.xml" />
+        <tool file="solid_tools/solid_qual_boxplot.xml" />
   </section>
   <section name="NGS: Mapping" id="solexa_tools">
    <!-- <tool file="sr_mapping/lastz_wrapper.xml" /> -->
diff -r c2e3183756da -r 2f848dc3e35c tools/samtools/pileup_parser.pl
--- a/tools/samtools/pileup_parser.pl	Mon Sep 21 10:39:58 2009 -0400
+++ b/tools/samtools/pileup_parser.pl	Mon Sep 21 10:40:25 2009 -0400
@@ -1,6 +1,8 @@
 #! /usr/bin/perl -w
 
 use strict;
+use POSIX;
+
 
 die "Usage: pileup_parser.pl <in_file> <ref_base_column> <read_bases_column> <base_quality_column> <coverage column> <qv cutoff> <coverage cutoff> <SNPs only?> <output bed?> <coord_column> <out_file>\n" unless @ARGV == 11;
 
@@ -28,10 +30,12 @@
 
 while (<IN>) {
 	chop;
+	next if m/^\#/;
 	my @fields = split /\t/;
 	next if $fields[ $ref_base_column ] eq "*"; # skip indel lines
-	next if $fields[ $cvrg_column ] < $cvrg_cutoff; # skip low coverage lines
-	my $read_bases   = $fields[ $read_bases_column ];
+ 	my $read_bases   = $fields[ $read_bases_column ];
+ 	die "Coverage column" . ($cvrg_column+1) . " contains non-numeric values. Check your input parameters as well as format of input dataset." if ( not isdigit $fields[ $cvrg_column ] );
+    next if $fields[ $cvrg_column ] < $cvrg_cutoff;
 	my $base_quality = $fields[ $base_quality_column ];
 	
 	if ($read_bases =~ m/[\$\^\+-]/) {
@@ -42,11 +46,8 @@
 			$read_bases =~ s/[\+-]{1}$indel_len.{$indel_len}//; # remove indel info from read base field
 		}
 	}
-	if ( length($read_bases) != length($base_quality) ) {
-		
-		$first_skipped_line = $_ if $invalid_line_counter == 0;
-		++$invalid_line_counter;
-	}
+	die "Error parsing read bases and qualities in line $.. Last processed line conatined these values: " . join("\t", @fields) . "\n" if ( length($read_bases) != length($base_quality) );
+
 	# after removing read block and indel data the length of read_base 
 	# field should identical to the length of base_quality field
 	
@@ -95,4 +96,4 @@
 
 print STDERR "Could not parse $invalid_line_counter line(s) beginning with: $first_skipped_line\n" if $invalid_line_counter > 0;
 close IN;
-close OUT;
\ No newline at end of file
+close OUT;
diff -r c2e3183756da -r 2f848dc3e35c tools/samtools/sam2interval.py
--- a/tools/samtools/sam2interval.py	Mon Sep 21 10:39:58 2009 -0400
+++ b/tools/samtools/sam2interval.py	Mon Sep 21 10:40:25 2009 -0400
@@ -78,7 +78,7 @@
 
     for line in infile:
         line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#,@' ):
+        if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
             fields = line.split( '\t' )
             start = int( fields[ int( options.start_col ) - 1 ] ) - 1
             end = 0
diff -r c2e3183756da -r 2f848dc3e35c tools/samtools/sam_bitwise_flag_filter.py
--- a/tools/samtools/sam_bitwise_flag_filter.py	Mon Sep 21 10:39:58 2009 -0400
+++ b/tools/samtools/sam_bitwise_flag_filter.py	Mon Sep 21 10:40:25 2009 -0400
@@ -135,7 +135,7 @@
 
     for line in infile:
         line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
+        if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
             fields = line.split( '\t' )
             sam_states = []
             sam_states.append( bool( int( fields[ int( options.flag_col ) - 1 ] ) & 0x0001 ) )
diff -r c2e3183756da -r 2f848dc3e35c tools/sr_mapping/bowtie_wrapper.xml
--- a/tools/sr_mapping/bowtie_wrapper.xml	Mon Sep 21 10:39:58 2009 -0400
+++ b/tools/sr_mapping/bowtie_wrapper.xml	Mon Sep 21 10:40:25 2009 -0400
@@ -336,7 +336,7 @@
                 </param>
               </when>
             </conditional>
-            <param name="offrate" type="integer" value="-1" label="Override the offrate of the index to n -o)" help="-1 for default" />
+            <param name="offrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" />
             <param name="seed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" />
           </when> <!-- full -->
         </conditional> <!-- params -->
@@ -443,31 +443,34 @@
 
 The output is in SAM format, and has the following columns::
 
-  1  QNAME  -  Query (pair) NAME
-  2  FLAG   -  bitwise FLAG
-  3  RNAME  -  Reference sequence NAME
-  4  POS    -  1-based leftmost POSition/coordinate of clipped sequence
-  5  MAPQ   -  MAPping Quality (Phred-scaled)
-  6  CIGAR  -  extended CIGAR string
-  7  MRNM   -  Mate Reference sequence NaMe ('=' if same as RNAME)
-  8  MPOS   -  1-based Mate POSition
-  9  ISIZE  -  Inferred insert SIZE
-  10 SEQ    -  query SEQuence on the same strand as the reference
-  11 QUAL   -  query QUALity (ASCII-33 gives the Phred base quality)
-  12 OPT    -  variable OPTional fields in the format TAG:VTYPE:VALU
+    Column  Description
+  --------  --------------------------------------------------------   
+   1 QNAME  Query (pair) NAME
+   2 FLAG   bitwise FLAG
+   3 RNAME  Reference sequence NAME
+   4 POS    1-based leftmost POSition/coordinate of clipped sequence
+   5 MAPQ   MAPping Quality (Phred-scaled)
+   6 CIGAR  extended CIGAR string
+   7 MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
+   8 MPOS   1-based Mate POSition
+   9 ISIZE  Inferred insert SIZE
+  10 SEQ    query SEQuence on the same strand as the reference
+  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
+  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALUE
   
 The flags are as follows::
 
-  Flag    -  Description
-  0x0001  -  the read is paired in sequencing
-  0x0002  -  the read is mapped in a proper pair
-  0x0004  -  the query sequence itself is unmapped
-  0x0008  -  the mate is unmapped
-  0x0010  -  strand of the query (1 for reverse)
-  0x0020  -  strand of the mate
-  0x0040  -  the read is the first read in a pair
-  0x0080  -  the read is the second read in a pair
-  0x0100  -  the alignment is not primary
+    Flag  Description
+  ------  -------------------------------------
+  0x0001  the read is paired in sequencing
+  0x0002  the read is mapped in a proper pair
+  0x0004  the query sequence itself is unmapped
+  0x0008  the mate is unmapped
+  0x0010  strand of the query (1 for reverse)
+  0x0020  strand of the mate
+  0x0040  the read is the first read in a pair
+  0x0080  the read is the second read in a pair
+  0x0100  the alignment is not primary
 
 It looks like this (scroll sideways to see the entire example)::

    

Nate Coraor

tags

participants (1)