[galaxy-commits] commit/galaxy-central: jgoecks: Basic support for Tophat2.

20 Apr 2012

1 new commit in galaxy-central:


https://bitbucket.org/galaxy/galaxy-central/changeset/6af88237056f/
changeset:   6af88237056f
user:        jgoecks
date:        2012-04-20 23:08:55
summary:     Basic support for Tophat2.
affected #:  4 files

diff -r 89786a8d42c1fbbfa84bfcbab07702a942d57d1b -r 6af88237056f0d4c7ae4c3dd5af647cde4830710 test-data/tophat_out2j.bed

--- a/test-data/tophat_out2j.bed
+++ b/test-data/tophat_out2j.bed
@@ -1,3 +1,3 @@
 track name=junctions description="TopHat junctions"
-test_chromosome	179	400	JUNC00000001	38	+	179	400	255,0,0	2	71,50	0,171
-test_chromosome	350	549	JUNC00000002	30	+	350	549	255,0,0	2	50,49	0,150
+test_chromosome	179	400	JUNC00000001	45	+	179	400	255,0,0	2	71,50	0,171
+test_chromosome	350	550	JUNC00000002	38	+	350	550	255,0,0	2	50,50	0,150


diff -r 89786a8d42c1fbbfa84bfcbab07702a942d57d1b -r 6af88237056f0d4c7ae4c3dd5af647cde4830710 test-data/tophat_out4j.bed
--- a/test-data/tophat_out4j.bed
+++ b/test-data/tophat_out4j.bed
@@ -1,3 +1,3 @@
 track name=junctions description="TopHat junctions"
-test_chromosome	179	400	JUNC00000001	38	+	179	400	255,0,0	2	71,50	0,171
-test_chromosome	350	549	JUNC00000002	30	+	350	549	255,0,0	2	50,49	0,150
+test_chromosome	179	400	JUNC00000001	45	+	179	400	255,0,0	2	71,50	0,171
+test_chromosome	350	550	JUNC00000002	38	+	350	550	255,0,0	2	50,50	0,150


diff -r 89786a8d42c1fbbfa84bfcbab07702a942d57d1b -r 6af88237056f0d4c7ae4c3dd5af647cde4830710 tools/ngs_rna/tophat_wrapper.py
--- a/tools/ngs_rna/tophat_wrapper.py
+++ b/tools/ngs_rna/tophat_wrapper.py
@@ -28,7 +28,6 @@
     parser.add_option( '-I', '--max-intron-length', dest='max_intron_length', 
                         help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' )
     parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' )
-    parser.add_option( '', '--initial-read-mismatches', dest='initial_read_mismatches', help='Number of mismatches allowed in the initial read mapping' )
     parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' )
     parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' )
     parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' )
@@ -53,15 +52,10 @@
     parser.add_option( '', '--no-novel-indels', action="store_true", dest='no_novel_indels', help="Skip indel search. Indel search is enabled by default.")
     # Types of search.
     parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.')
-    parser.add_option( '', '--closure-search', action="store_true", dest='closure_search', help='Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (<= 50bp)')
-    parser.add_option( '', '--no-closure-search', action="store_false", dest='closure_search' )
     parser.add_option( '', '--coverage-search', action="store_true", dest='coverage_search', help='Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.')
     parser.add_option( '', '--no-coverage-search', action="store_false", dest='coverage_search' )
     parser.add_option( '', '--min-segment-intron', dest='min_segment_intron', help='Minimum intron length that may be found during split-segment search' )
     parser.add_option( '', '--max-segment-intron', dest='max_segment_intron', help='Maximum intron length that may be found during split-segment search' )
-    parser.add_option( '', '--min-closure-exon', dest='min_closure_exon', help='Minimum length for exonic hops in potential splice graph' )
-    parser.add_option( '', '--min-closure-intron', dest='min_closure_intron', help='Minimum intron length that may be found during closure search' )
-    parser.add_option( '', '--max-closure-intron', dest='max_closure_intron', help='Maximum intron length that may be found during closure search' )
     parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' )
     parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' )
 
@@ -175,16 +169,10 @@
                 opts += ' --coverage-search --min-coverage-intron %s --max-coverage-intron %s' % ( options.min_coverage_intron, options.max_coverage_intron )
             else:
                 opts += ' --no-coverage-search'
-            if options.closure_search:
-                opts += ' --closure-search --min-closure-exon %s --min-closure-intron %s --max-closure-intron %s'  % ( options.min_closure_exon, options.min_closure_intron, options.max_closure_intron ) 
-            else:
-                opts += ' --no-closure-search'
             if options.microexon_search:
                 opts += ' --microexon-search'
             if options.single_paired == 'paired':
                 opts += ' --mate-std-dev %s' % options.mate_std_dev
-            if options.initial_read_mismatches:
-                opts += ' --initial-read-mismatches %d' % int( options.initial_read_mismatches )
             if options.seg_mismatches:
                 opts += ' --segment-mismatches %d' % int( options.seg_mismatches )
             if options.seg_length:


diff -r 89786a8d42c1fbbfa84bfcbab07702a942d57d1b -r 6af88237056f0d4c7ae4c3dd5af647cde4830710 tools/ngs_rna/tophat_wrapper.xml
--- a/tools/ngs_rna/tophat_wrapper.xml
+++ b/tools/ngs_rna/tophat_wrapper.xml
@@ -1,5 +1,5 @@
 <tool id="tophat" name="Tophat for Illumina" version="0.5">
-    <!-- Wrapper compatible with Tophat versions 1.3.0 to 1.4.1 -->
+    <!-- Wrapper compatible with Tophat versions 1.3.0 to 2.0.0 --><description>Find splice junctions using RNA-seq data</description><version_command>tophat --version</version_command><requirements>
@@ -38,7 +38,6 @@
                     -g $singlePaired.sParams.max_multihits
                     --min-segment-intron $singlePaired.sParams.min_segment_intron
                     --max-segment-intron $singlePaired.sParams.max_segment_intron
-                    --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches
                     --seg-mismatches=$singlePaired.sParams.seg_mismatches
                     --seg-length=$singlePaired.sParams.seg_length
                     --library-type=$singlePaired.sParams.library_type
@@ -66,14 +65,6 @@
                         #end if
                     #end if
 
-                    #if $singlePaired.sParams.closure_search.use_search == "Yes":
-                        --closure-search
-                        --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon
-                        --min-closure-intron $singlePaired.sParams.closure_search.min_closure_intron
-                        --max-closure-intron $singlePaired.sParams.closure_search.max_closure_intron
-                    #else:
-                        --no-closure-search
-                    #end if
                     #if $singlePaired.sParams.coverage_search.use_search == "Yes":
                         --coverage-search
                         --min-coverage-intron $singlePaired.sParams.coverage_search.min_coverage_intron
@@ -99,7 +90,6 @@
                     -g $singlePaired.pParams.max_multihits
                     --min-segment-intron $singlePaired.pParams.min_segment_intron
                     --max-segment-intron $singlePaired.pParams.max_segment_intron
-                    --initial-read-mismatches=$singlePaired.pParams.initial_read_mismatches
                     --seg-mismatches=$singlePaired.pParams.seg_mismatches
                     --seg-length=$singlePaired.pParams.seg_length
                     --library-type=$singlePaired.pParams.library_type
@@ -127,14 +117,6 @@
                         #end if
                     #end if
 
-                    #if $singlePaired.pParams.closure_search.use_search == "Yes":
-                        --closure-search
-                        --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon
-                        --min-closure-intron $singlePaired.pParams.closure_search.min_closure_intron
-                        --max-closure-intron $singlePaired.pParams.closure_search.max_closure_intron
-                    #else:
-                        --no-closure-search
-                    #end if
                     #if $singlePaired.pParams.coverage_search.use_search == "Yes":
                         --coverage-search
                         --min-coverage-intron $singlePaired.pParams.coverage_search.min_coverage_intron
@@ -206,7 +188,6 @@
                   <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /><param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /><param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
-                  <param name="initial_read_mismatches" type="integer" min="0" value="2" label="Number of mismatches allowed in the initial read mapping" /><param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /><param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
                   
@@ -245,19 +226,6 @@
                       <when value="No" /></conditional><!-- /own_junctions -->
                   
-                  <!-- Closure search. -->
-                  <conditional name="closure_search">
-                    <param name="use_search" type="select" label="Use Closure Search">
-                      <option value="No">No</option>
-                      <option value="Yes">Yes</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
-                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
-                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
-                    </when>
-                    <when value="No" />
-                  </conditional><!-- Coverage search. --><conditional name="coverage_search"><param name="use_search" type="select" label="Use Coverage Search">
@@ -312,7 +280,6 @@
                   <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /><param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /><param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
-                  <param name="initial_read_mismatches" type="integer" min="0" value="2" label="Number of mismatches allowed in the initial read mapping" /><param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /><param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /><!-- Options for supplying own junctions. -->
@@ -350,19 +317,6 @@
                       <when value="No" /></conditional><!-- /own_junctions -->
                   
-                  <!-- Closure search. -->
-                  <conditional name="closure_search">
-                    <param name="use_search" type="select" label="Use Closure Search">
-                      <option value="No">No</option>
-                      <option value="Yes">Yes</option>
-                    </param>
-                    <when value="Yes">
-                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
-                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
-                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
-                    </when>
-                    <when value="No" />
-                  </conditional><!-- Coverage search. --><conditional name="coverage_search"><param name="use_search" type="select" label="Use Coverage Search">
@@ -500,7 +454,7 @@
         <test><!-- Tophat commands:
             bowtie-build -f test-data/tophat_in1.fasta tophat_in1
-            tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
+            tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
             Replace the + with double-dash
             Rename the files in tmp_dir appropriately
             -->
@@ -527,10 +481,6 @@
             <param name="use_juncs" value="No" /><param name="no_novel_juncs" value="No" /><param name="use_search" value="Yes" />
-            <param name="min_closure_exon" value="50" />
-            <param name="min_closure_intron" value="50" />
-            <param name="max_closure_intron" value="5000" />
-            <param name="use_search" value="Yes" /><param name="min_coverage_intron" value="50" /><param name="max_coverage_intron" value="20000" /><param name="microexon_search" value="Yes" />
@@ -643,17 +593,11 @@
   -G/--GTF [GTF 2.2 file]           Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.
   -j/--raw-juncs [juncs file]       Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.
   -no-novel-juncs                   Only look for junctions indicated in the supplied GFF file. (ignored without -G)
-  --no-closure-search               Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.
-  --closure-search                  Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)
   --no-coverage-search              Disables the coverage based search for junctions.
   --coverage-search                 Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.
   --microexon-search                With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.
-  --butterfly-search                TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.
   --segment-mismatches              Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.
   --segment-length                  Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.
-  --min-closure-exon                During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.
-  --min-closure-intron              The minimum intron length that may be found during closure search. The default is 50.
-  --max-closure-intron              The maximum intron length that may be found during closure search. The default is 5000.
   --min-coverage-intron             The minimum intron length that may be found during coverage search. The default is 50.
   --max-coverage-intron             The maximum intron length that may be found during coverage search. The default is 20000.
   --min-segment-intron              The minimum intron length that may be found during split-segment search. The default is 50.

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

[galaxy-commits] commit/galaxy-central: jgoecks: Basic support for Tophat2.

Bitbucket