galaxy-dist commit 9d68027b0109: Add options to Tophat wrapper for specifying own splice junctions.
# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks@emory.edu> # Date 1288805582 14400 # Node ID 9d68027b01096e2b32101234878d11946c03d08c # Parent 49f0e8441a4da6b1ec03250448ab84854f07aa77 Add options to Tophat wrapper for specifying own splice junctions. --- a/tools/ngs_rna/tophat_wrapper.py +++ b/tools/ngs_rna/tophat_wrapper.py @@ -29,6 +29,24 @@ def __main__(): help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' ) parser.add_option( '-F', '--junction_filter', dest='junction_filter', help='Filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)' ) parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' ) + parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' ) + parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' ) + + # Options for supplying own junctions + parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \ + TopHat will use the exon records in this file to build \ + a set of known splice junctions for each gene, and will \ + attempt to align reads to these junctions even if they \ + would not normally be covered by the initial mapping.') + parser.add_option( '-j', '--raw-juncs', dest='raw_juncs', help='Supply TopHat with a list of raw junctions. Junctions are \ + specified one per line, in a tab-delimited format. Records \ + look like: <chrom><left><right><+/-> left and right are \ + zero-based coordinates, and specify the last character of the \ + left sequenced to be spliced to the first character of the right \ + sequence, inclusive.') + parser.add_option( '', '--no-novel-juncs', action="store_true", dest='no_novel_juncs', help="Only look for junctions indicated in the \ + supplied GFF file. (ignored without -G)") + # Types of search. parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.') parser.add_option( '', '--closure-search', action="store_true", dest='closure_search', help='Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (<= 50bp)') parser.add_option( '', '--no-closure-search', action="store_false", dest='closure_search' ) @@ -41,8 +59,6 @@ def __main__(): parser.add_option( '', '--max-closure-intron', dest='max_closure_intron', help='Maximum intron length that may be found during closure search' ) parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' ) parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' ) - parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' ) - parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' ) # Wrapper options. parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) @@ -107,6 +123,15 @@ def __main__(): if float( options.junction_filter ) != 0.0: opts += ' -F %s' % options.junction_filter opts += ' -g %s' % options.max_multihits + # Custom junctions options. + if options.gene_model_annotations: + opts += ' -G %s' % options.gene_model_annotations + if options.raw_juncs: + opts += ' -j %s' % options.raw_juncs + if options.no_novel_juncs: + opts += ' --no-novel-juncs' + + # Search type options. if options.coverage_search: opts += ' --coverage-search --min-coverage-intron %s --max-coverage-intron %s' % ( options.min_coverage_intron, options.max_coverage_intron ) else: --- a/tools/ngs_rna/tophat_wrapper.xml +++ b/tools/ngs_rna/tophat_wrapper.xml @@ -45,6 +45,21 @@ --max-segment-intron $singlePaired.sParams.max_segment_intron --seg-mismatches=$singlePaired.sParams.seg_mismatches --seg-length=$singlePaired.sParams.seg_length + + ## Supplying junctions parameters. + #if $singlePaired.sParams.own_junctions.use_junctions == "Yes": + #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes": + -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model + #end if + #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes": + -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs + #end if + ## TODO: No idea why a string cast is necessary, but it is: + #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes": + --no-novel-juncs + #end if + #end if + #if $singlePaired.sParams.closure_search.use_search == "Yes": --closure-search --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon @@ -60,8 +75,8 @@ #else: --no-coverage-search #end if - ## No idea why the type conversion is necessary, but it seems to be. - #if str ($singlePaired.sParams.microexon_search) == "Yes": + ## TODO: No idea why the type conversion is necessary, but it seems to be. + #if str($singlePaired.sParams.microexon_search) == "Yes": --microexon-search #end if #end if @@ -81,6 +96,21 @@ --max-segment-intron $singlePaired.pParams.max_segment_intron --seg-mismatches=$singlePaired.pParams.seg_mismatches --seg-length=$singlePaired.pParams.seg_length + + ## Supplying junctions parameters. + #if $singlePaired.pParams.own_junctions.use_junctions == "Yes": + #if $singlePaired.pParams.own_junctions.gene_model_ann.use_annotations == "Yes": + -G $singlePaired.pParams.own_junctions.gene_model_ann.gene_annotation_model + #end if + #if $singlePaired.pParams.own_junctions.raw_juncs.use_juncs == "Yes": + -j $singlePaired.pParams.own_junctions.raw_juncs.raw_juncs + #end if + ## TODO: No idea why type cast is necessary, but it is: + #if str($singlePaired.pParams.own_junctions.no_novel_juncs) == "Yes": + --no-novel-juncs + #end if + #end if + #if $singlePaired.pParams.closure_search.use_search == "Yes": --closure-search --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon @@ -96,7 +126,7 @@ #else: --no-coverage-search #end if - ## No idea why the type conversion is necessary, but it seems to be. + ## TODO: No idea why the type conversion is necessary, but it seems to be. #if str ($singlePaired.pParams.microexon_search) == "Yes": --microexon-search #end if @@ -146,6 +176,42 @@ <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /><param name="seg_mismatches" type="integer" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /><param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> + + <!-- Options for supplying own junctions. --> + <conditional name="own_junctions"> + <param name="use_junctions" type="select" label="Use Own Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <conditional name="gene_model_ann"> + <param name="use_annotations" type="select" label="Use Gene Annotation Model"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/> + </when> + </conditional> + <conditional name="raw_juncs"> + <param name="use_juncs" type="select" label="Use Raw Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/> + </when> + </conditional> + <param name="no_novel_juncs" type="select" label="Only look for supplied junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + </when> + <when value="No" /> + </conditional><!-- /own_junctions --> + <!-- Closure search. --><conditional name="closure_search"><param name="use_search" type="select" label="Use Closure Search"> @@ -201,6 +267,41 @@ <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /><param name="seg_mismatches" type="integer" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /><param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> + <!-- Options for supplying own junctions. --> + <conditional name="own_junctions"> + <param name="use_junctions" type="select" label="Use Own Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <conditional name="gene_model_ann"> + <param name="use_annotations" type="select" label="Use Gene Annotation Model"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/> + </when> + </conditional> + <conditional name="raw_juncs"> + <param name="use_juncs" type="select" label="Use Raw Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/> + </when> + </conditional> + <param name="no_novel_juncs" type="select" label="Only look for supplied junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + </when> + <when value="No" /> + </conditional><!-- /own_junctions --> + <!-- Closure search. --><conditional name="closure_search"><param name="use_search" type="select" label="Use Closure Search"> @@ -385,8 +486,11 @@ This is a list of implemented Tophat opt -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the filter. The default is 0.15. - -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many + -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many alignments. The default is 40. + -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping. + -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive. + -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G) --no-closure-search Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default. --closure-search Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp) --no-coverage-search Disables the coverage based search for junctions.
participants (1)
-
commits-noreply@bitbucket.org