April 2012 - galaxy-commits - lists.galaxyproject.org

commit/galaxy-central: jgoecks: Remove version code from tophat wrappers as this is now handled by <version> tag.
by Bitbucket 26 Apr '12

26 Apr '12

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/1c8eb226af94/ changeset: 1c8eb226af94 user: jgoecks date: 2012-04-26 20:25:01 summary: Remove version code from tophat wrappers as this is now handled by <version> tag. affected #: 2 files diff -r c16adb4630e58fe4001e488367e27e911d7ec0cb -r 1c8eb226af94b29ef8484c4fbed557282170259e tools/ngs_rna/tophat2_wrapper.py --- a/tools/ngs_rna/tophat2_wrapper.py +++ b/tools/ngs_rna/tophat2_wrapper.py @@ -80,21 +80,6 @@ (options, args) = parser.parse_args() - # output version # of tool - try: - tmp = tempfile.NamedTemporaryFile().name - tmp_stdout = open( tmp, 'wb' ) - proc = subprocess.Popen( args='tophat -v', shell=True, stdout=tmp_stdout ) - tmp_stdout.close() - returncode = proc.wait() - stdout = open( tmp_stdout.name, 'rb' ).readline().strip() - if stdout: - sys.stdout.write( '%s\n' % stdout ) - else: - raise Exception - except: - sys.stdout.write( 'Could not determine Tophat version\n' ) - # Color or base space space = '' if options.color_space: diff -r c16adb4630e58fe4001e488367e27e911d7ec0cb -r 1c8eb226af94b29ef8484c4fbed557282170259e tools/ngs_rna/tophat_wrapper.py --- a/tools/ngs_rna/tophat_wrapper.py +++ b/tools/ngs_rna/tophat_wrapper.py @@ -73,21 +73,6 @@ (options, args) = parser.parse_args() - # output version # of tool - try: - tmp = tempfile.NamedTemporaryFile().name - tmp_stdout = open( tmp, 'wb' ) - proc = subprocess.Popen( args='tophat -v', shell=True, stdout=tmp_stdout ) - tmp_stdout.close() - returncode = proc.wait() - stdout = open( tmp_stdout.name, 'rb' ).readline().strip() - if stdout: - sys.stdout.write( '%s\n' % stdout ) - else: - raise Exception - except: - sys.stdout.write( 'Could not determine Tophat version\n' ) - # Color or base space space = '' if options.color_space: Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0

commit/galaxy-central: jgoecks: Fix version references in tophat2 wrapper.
by Bitbucket 26 Apr '12

26 Apr '12

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/c16adb4630e5/ changeset: c16adb4630e5 user: jgoecks date: 2012-04-26 19:59:03 summary: Fix version references in tophat2 wrapper. affected #: 1 file diff -r 9884af7d49189f1af3f8ae60931064d2abcc62cc -r c16adb4630e58fe4001e488367e27e911d7ec0cb tools/ngs_rna/tophat2_wrapper.xml --- a/tools/ngs_rna/tophat2_wrapper.xml +++ b/tools/ngs_rna/tophat2_wrapper.xml @@ -1,9 +1,9 @@ <tool id="tophat2" name="Tophat2" version="0.5"><description>Gapped-read mapper for RNA-seq data</description> - <version_command>tophat --version</version_command> + <version_command>tophat2 --version</version_command><requirements> - <requirement type="package">tophat</requirement> + <requirement type="package">tophat2</requirement></requirements><command interpreter="python"> tophat2_wrapper.py Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0

commit/galaxy-central: jgoecks: Make history export work for optional data inputs.
by Bitbucket 26 Apr '12

26 Apr '12

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/9884af7d4918/ changeset: 9884af7d4918 user: jgoecks date: 2012-04-26 19:32:04 summary: Make history export work for optional data inputs. affected #: 1 file diff -r 0a0b01a980eac2f4c98d317bbde3ff572aca13e9 -r 9884af7d49189f1af3f8ae60931064d2abcc62cc lib/galaxy/tools/imp_exp/__init__.py --- a/lib/galaxy/tools/imp_exp/__init__.py +++ b/lib/galaxy/tools/imp_exp/__init__.py @@ -415,8 +415,13 @@ params_dict[ name ] = value job_attrs[ 'params' ] = params_dict - # Get input, output datasets. - input_datasets = [ assoc.dataset.hid for assoc in job.input_datasets ] + # -- Get input, output datasets. -- + + input_datasets = [] + for assoc in job.input_datasets: + # Optional data inputs will not have a dataset. + if assoc.dataset: + input_datasets.append( assoc.dataset.hid ) job_attrs[ 'input_datasets' ] = input_datasets output_datasets = [ assoc.dataset.hid for assoc in job.output_datasets ] job_attrs[ 'output_datasets' ] = output_datasets Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0

commit/galaxy-central: anton: Fixed number of threads
by Bitbucket 26 Apr '12

26 Apr '12

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/0a0b01a980ea/ changeset: 0a0b01a980ea user: anton date: 2012-04-26 16:49:38 summary: Fixed number of threads affected #: 1 file diff -r 0b5cb60e48104bd1df937bd6c7051abfda7190ff -r 0a0b01a980eac2f4c98d317bbde3ff572aca13e9 tools/metag_tools/megablast_wrapper.py --- a/tools/metag_tools/megablast_wrapper.py +++ b/tools/metag_tools/megablast_wrapper.py @@ -15,6 +15,10 @@ usage: %prog db_build input_file word_size identity_cutoff eval_cutoff filter_query index_dir output_file """ +# This version (April 26, 2012) replaces megablast with blast+ blastn +# There is now no need to augment NCBI-formatted databases and these can be +# directly downloaded from NCBI ftp site + import os, subprocess, sys, tempfile from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) @@ -29,11 +33,11 @@ def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) - query_filename = options.input.strip() - output_filename = options.output.strip() - mega_word_size = options.word_size # -W - mega_iden_cutoff = options.identity_cutoff # -p - mega_evalue_cutoff = options.eval_cutoff # -e + query_filename = options.input.strip() # -query + output_filename = options.output.strip() # -out + mega_word_size = options.word_size # -word_size + mega_iden_cutoff = options.identity_cutoff # -perc_identity + mega_evalue_cutoff = options.eval_cutoff # -evalue mega_temp_output = tempfile.NamedTemporaryFile().name GALAXY_DATA_INDEX_DIR = options.index_dir DB_LOC = "%s/blastdb.loc" % GALAXY_DATA_INDEX_DIR @@ -56,7 +60,7 @@ stop_err( 'Cannot locate the target database directory. Please check your location file.' ) # arguments for megablast - megablast_command = "blastn -task megablast -db %s -query %s -out %s -outfmt '6 qseqid sgi slen ppos length mismatch gaps qstart qend sstart send evalue bitscore' -num_threads 2 -word_size %s -perc_identity %s -evalue %s -dust %s > /dev/null" \ + megablast_command = "blastn -task megablast -db %s -query %s -out %s -outfmt '6 qseqid sgi slen ppos length mismatch gaps qstart qend sstart send evalue bitscore' -num_threads 8 -word_size %s -perc_identity %s -evalue %s -dust %s > /dev/null" \ % ( options.db_build, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, options.filter_query ) print megablast_command @@ -88,7 +92,7 @@ os.unlink( mega_temp_output ) if os.path.exists( tmp ): os.unlink( tmp ) - stop_err( 'Error indexing reference sequence. ' + str( e ) ) + stop_err( 'Cannot execute megaablast. ' + str( e ) ) output = open( output_filename, 'w' ) invalid_lines = 0 @@ -96,7 +100,7 @@ line = line.rstrip( '\r\n' ) fields = line.split() try: - # convert the last column (causing problem in filter tool) to float + # convert the last column (bit-score as this is causing problem in filter tool) to float fields[-1] = float( fields[-1] ) new_line = "%s\t%0.1f" % ( '\t'.join( fields[:-1] ), fields[-1] ) except: Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0

commit/galaxy-central: anton: Modified metagenomic mapper to use BLAST+ blastn in megablast mode
by Bitbucket 26 Apr '12

26 Apr '12

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/0b5cb60e4810/ changeset: 0b5cb60e4810 user: anton date: 2012-04-26 16:44:26 summary: Modified metagenomic mapper to use BLAST+ blastn in megablast mode affected #: 2 files diff -r 127709d69d2c08bbceb3aa811c42696aec26ad10 -r 0b5cb60e48104bd1df937bd6c7051abfda7190ff tools/metag_tools/megablast_wrapper.py --- a/tools/metag_tools/megablast_wrapper.py +++ b/tools/metag_tools/megablast_wrapper.py @@ -56,7 +56,7 @@ stop_err( 'Cannot locate the target database directory. Please check your location file.' ) # arguments for megablast - megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null" \ + megablast_command = "blastn -task megablast -db %s -query %s -out %s -outfmt '6 qseqid sgi slen ppos length mismatch gaps qstart qend sstart send evalue bitscore' -num_threads 2 -word_size %s -perc_identity %s -evalue %s -dust %s > /dev/null" \ % ( options.db_build, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, options.filter_query ) print megablast_command @@ -96,11 +96,9 @@ line = line.rstrip( '\r\n' ) fields = line.split() try: - # get gi and length of that gi seq - gi, gi_len = fields[1].split( '_' ) # convert the last column (causing problem in filter tool) to float fields[-1] = float( fields[-1] ) - new_line = "%s\t%s\t%s\t%s\t%0.1f" % ( fields[0], gi, gi_len, '\t'.join( fields[2:-1] ), fields[-1] ) + new_line = "%s\t%0.1f" % ( '\t'.join( fields[:-1] ), fields[-1] ) except: new_line = line invalid_lines += 1 diff -r 127709d69d2c08bbceb3aa811c42696aec26ad10 -r 0b5cb60e48104bd1df937bd6c7051abfda7190ff tools/metag_tools/megablast_wrapper.xml --- a/tools/metag_tools/megablast_wrapper.xml +++ b/tools/metag_tools/megablast_wrapper.xml @@ -1,4 +1,4 @@ -<tool id="megablast_wrapper" name="Megablast" version="1.1.0"> +<tool id="megablast_wrapper" name="Megablast" version="1.2.0"><description> compare short reads against htgs, nt, and wgs databases</description><command interpreter="python"> megablast_wrapper.py @@ -16,15 +16,15 @@ <param name="source_select" type="select" display="radio" label="against target database"><options from_data_table="blastdb" /></param> - <param name="word_size" type="select" label="using word size" help="Size of best perfect match (-W)"> + <param name="word_size" type="select" label="using word size" help="Size of best perfect match (-word_size)"><option value="28">28</option><option value="16">16</option></param> - <param name="iden_cutoff" type="float" size="15" value="90.0" label="report hits above this identity (-p)" help="no cutoff if 0" /> - <param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff (-e)" /> - <param name="filter_query" type="select" label="Filter out low complexity regions? (-F)"> - <option value="T">Yes</option> - <option value="F">No</option> + <param name="iden_cutoff" type="float" size="15" value="90.0" label="report hits above this identity (-perc_identity)" help="no cutoff if 0" /> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="set expectation value cutoff (-evalue)" /> + <param name="filter_query" type="select" label="Filter out low complexity regions? (-dust)"> + <option value="yes">Yes</option> + <option value="no">No</option></param></inputs><outputs> @@ -41,7 +41,7 @@ <param name="word_size" value="28" /><param name="iden_cutoff" value="99.0" /><param name="evalue_cutoff" value="10.0" /> - <param name="filter_query" value="T" /> + <param name="filter_query" value="yes" /><output name="output1" file="megablast_wrapper_test1.out"/></test></tests> @@ -55,7 +55,7 @@ **What it does** -This tool runs **megablast** (for information about megablast, please see the reference below) a high performance nucleotide local aligner developed by Webb Miller and colleagues. +This tool runs **megablast** function of BLAST+ blastn tool - a high performance nucleotide local aligner developed by Webb Miller and colleagues. ----- Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0

commit/galaxy-central: jgoecks: Add basic tool execution to the API.
by Bitbucket 25 Apr '12

25 Apr '12

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/127709d69d2c/ changeset: 127709d69d2c user: jgoecks date: 2012-04-26 00:03:11 summary: Add basic tool execution to the API. affected #: 1 file diff -r 0a13c751de1adf649983aa66f53fd23811d03d8f -r 127709d69d2c08bbceb3aa811c42696aec26ad10 lib/galaxy/web/api/tools.py --- a/lib/galaxy/web/api/tools.py +++ b/lib/galaxy/web/api/tools.py @@ -1,5 +1,10 @@ from galaxy import config, tools, web, util from galaxy.web.base.controller import BaseController, BaseAPIController +from galaxy.util.bunch import Bunch + +messages = Bunch( + NO_TOOL = "no tool" +) class ToolsController( BaseAPIController ): """ @@ -11,8 +16,10 @@ """ GET /api/tools: returns a list of tools defined by parameters parameters: - in_panel - if true, tools are returned in panel structure, including sections and labels - trackster - if true, only tools that are compatible with Trackster are returned + in_panel - if true, tools are returned in panel structure, + including sections and labels + trackster - if true, only tools that are compatible with + Trackster are returned """ # Read params. @@ -29,4 +36,40 @@ Returns tool information, including parameters and inputs. """ return self.app.toolbox.tools_by_id[ id ].to_dict( trans, for_display=True ) + + @web.expose_api + def create( self, trans, payload, **kwd ): + """ + POST /api/tools + Executes tool using specified inputs, creating new history-dataset + associations, which are returned. + """ + + # TODO: set target history? + + # -- Execute tool. -- + + # Get tool. + tool_id = payload[ 'id' ] + tool = trans.app.toolbox.get_tool( tool_id ) + if not tool: + return { "message": { "type": "error", "text" : messages.NO_TOOL } } + + # Set up inputs. + inputs = payload[ 'inputs' ] + # HACK: add run button so that tool.handle_input will run tool. + inputs['runtool_btn'] = 'Execute' + # TODO: encode data ids and decode ids. + params = util.Params( inputs, sanitize = False ) + template, vars = tool.handle_input( trans, params.__dict__ ) + + # TODO: check for errors and ensure that output dataset(s) are available. + output_datasets = vars[ 'out_data' ].values() + rval = { + "outputs": [] + } + outputs = rval[ "outputs" ] + for output in output_datasets: + outputs.append( output.get_api_value() ) + return rval \ No newline at end of file Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0

commit/galaxy-central: 2 new changesets
by Bitbucket 25 Apr '12

25 Apr '12

2 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/de2891d266b2/ changeset: de2891d266b2 user: jgoecks date: 2012-04-25 17:25:17 summary: Add explicit Tophat2 wrapper. affected #: 6 files diff -r 0cffe389e1b3470e18494fb21491e75989282b97 -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 test-data/tophat2_out2j.bed --- /dev/null +++ b/test-data/tophat2_out2j.bed @@ -0,0 +1,3 @@ +track name=junctions description="TopHat junctions" +test_chromosome 179 400 JUNC00000001 45 + 179 400 255,0,0 2 71,50 0,171 +test_chromosome 350 550 JUNC00000002 38 + 350 550 255,0,0 2 50,50 0,150 diff -r 0cffe389e1b3470e18494fb21491e75989282b97 -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 test-data/tophat2_out3j.bed --- /dev/null +++ b/test-data/tophat2_out3j.bed @@ -0,0 +1,3 @@ +track name=junctions description="TopHat junctions" +test_chromosome 177 400 JUNC00000001 27 + 177 400 255,0,0 2 73,50 0,173 +test_chromosome 350 550 JUNC00000002 26 + 350 550 255,0,0 2 50,50 0,150 diff -r 0cffe389e1b3470e18494fb21491e75989282b97 -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 test-data/tophat2_out4j.bed --- /dev/null +++ b/test-data/tophat2_out4j.bed @@ -0,0 +1,3 @@ +track name=junctions description="TopHat junctions" +test_chromosome 177 400 JUNC00000001 51 + 177 400 255,0,0 2 73,50 0,173 +test_chromosome 350 550 JUNC00000002 43 + 350 550 255,0,0 2 50,50 0,150 diff -r 0cffe389e1b3470e18494fb21491e75989282b97 -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 tool_conf.xml.sample --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -356,6 +356,7 @@ <label text="RNA-seq" id="rna_seq" /><tool file="ngs_rna/tophat_wrapper.xml" /> + <tool file="ngs_rna/tophat2_wrapper.xml" /><tool file="ngs_rna/tophat_color_wrapper.xml" /><tool file="ngs_rna/cufflinks_wrapper.xml" /><tool file="ngs_rna/cuffcompare_wrapper.xml" /> diff -r 0cffe389e1b3470e18494fb21491e75989282b97 -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 tools/ngs_rna/tophat2_wrapper.py --- /dev/null +++ b/tools/ngs_rna/tophat2_wrapper.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' ) + parser.add_option( '-C', '--color-space', dest='color_space', action='store_true', help='This indicates color-space data' ) + parser.add_option( '-J', '--junctions-output', dest='junctions_output_file', help='Junctions output file; formate is BED.' ) + parser.add_option( '-H', '--hits-output', dest='accepted_hits_output_file', help='Accepted hits output file; formate is BAM.' ) + parser.add_option( '', '--own-file', dest='own_file', help='' ) + parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' ) + parser.add_option( '-r', '--mate-inner-dist', dest='mate_inner_dist', help='This is the expected (mean) inner distance between mate pairs. \ + For, example, for paired end runs with fragments selected at 300bp, \ + where each end is 50bp, you should set -r to be 200. There is no default, \ + and this parameter is required for paired end runs.') + parser.add_option( '', '--mate-std-dev', dest='mate_std_dev', help='Standard deviation of distribution on inner distances between male pairs.' ) + parser.add_option( '-n', '--transcriptome-mismatches', dest='transcriptome_mismatches' ) + parser.add_option( '', '--genome-read-mismatches', dest='genome_read_mismatches' ) + parser.add_option( '', '--read-mismatches', dest='read_mismatches' ) + parser.add_option( '', '--bowtie-n', action="store_true", dest='bowtie_n' ) + parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length', + help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' ) + parser.add_option( '-m', '--splice-mismatches', dest='splice_mismatches', help='The maximum number of mismatches that can appear in the anchor region of a spliced alignment.' ) + parser.add_option( '-i', '--min-intron-length', dest='min_intron_length', + help='The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.' ) + parser.add_option( '-I', '--max-intron-length', dest='max_intron_length', + help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' ) + parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' ) + parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' ) + parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' ) + parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' ) + parser.add_option( '', '--allow-indels', action="store_true", help='Allow indel search. Indel search is disabled by default.(Not used since version 1.3.0)' ) + parser.add_option( '', '--max-insertion-length', dest='max_insertion_length', help='The maximum insertion length. The default is 3.' ) + parser.add_option( '', '--max-deletion-length', dest='max_deletion_length', help='The maximum deletion length. The default is 3.' ) + + # Options for supplying own junctions + parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \ + TopHat will use the exon records in this file to build \ + a set of known splice junctions for each gene, and will \ + attempt to align reads to these junctions even if they \ + would not normally be covered by the initial mapping.') + parser.add_option( '-j', '--raw-juncs', dest='raw_juncs', help='Supply TopHat with a list of raw junctions. Junctions are \ + specified one per line, in a tab-delimited format. Records \ + look like: <chrom><left><right><+/-> left and right are \ + zero-based coordinates, and specify the last character of the \ + left sequenced to be spliced to the first character of the right \ + sequence, inclusive.') + parser.add_option( '', '--no-novel-juncs', action="store_true", dest='no_novel_juncs', help="Only look for junctions indicated in the \ + supplied GFF file. (ignored without -G)") + parser.add_option( '', '--no-novel-indels', action="store_true", dest='no_novel_indels', help="Skip indel search. Indel search is enabled by default.") + # Types of search. + parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.') + parser.add_option( '', '--coverage-search', action="store_true", dest='coverage_search', help='Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.') + parser.add_option( '', '--no-coverage-search', action="store_false", dest='coverage_search' ) + parser.add_option( '', '--min-segment-intron', dest='min_segment_intron', help='Minimum intron length that may be found during split-segment search' ) + parser.add_option( '', '--max-segment-intron', dest='max_segment_intron', help='Maximum intron length that may be found during split-segment search' ) + parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' ) + parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' ) + + # Fusion search options. + parser.add_option( '', '--fusion-search', action='store_true', dest='fusion_search' ) + parser.add_option( '', '--fusion-anchor-length', dest='fusion_anchor_length' ) + parser.add_option( '', '--fusion-min-dist', dest='fusion_min_dist' ) + parser.add_option( '', '--fusion-read-mismatches', dest='fusion_read_mismatches' ) + parser.add_option( '', '--fusion-multireads', dest='fusion_multireads' ) + parser.add_option( '', '--fusion-multipairs', dest='fusion_multipairs' ) + parser.add_option( '', '--fusion-ignore-chromosomes', dest='fusion_ignore_chromosomes' ) + + # Wrapper options. + parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) + parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) + parser.add_option( '', '--single-paired', dest='single_paired', help='' ) + parser.add_option( '', '--settings', dest='settings', help='' ) + + (options, args) = parser.parse_args() + + # output version # of tool + try: + tmp = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp, 'wb' ) + proc = subprocess.Popen( args='tophat -v', shell=True, stdout=tmp_stdout ) + tmp_stdout.close() + returncode = proc.wait() + stdout = open( tmp_stdout.name, 'rb' ).readline().strip() + if stdout: + sys.stdout.write( '%s\n' % stdout ) + else: + raise Exception + except: + sys.stdout.write( 'Could not determine Tophat version\n' ) + + # Color or base space + space = '' + if options.color_space: + space = '-C' + + # Creat bowtie index if necessary. + tmp_index_dir = tempfile.mkdtemp() + if options.own_file: + index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.own_file )[1].split( '.' )[:-1] ) ) + try: + os.link( options.own_file, index_path + '.fa' ) + except: + # Tophat prefers (but doesn't require) fasta file to be in same directory, with .fa extension + pass + cmd_index = 'bowtie-build %s -f %s %s' % ( space, options.own_file, index_path ) + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error indexing reference sequence\n' + str( e ) ) + else: + index_path = options.index_path + + # Build tophat command. + cmd = 'tophat2 %s %s %s' + reads = options.input1 + if options.input2: + reads += ' ' + options.input2 + opts = '-p %s %s' % ( options.num_threads, space ) + if options.single_paired == 'paired': + opts += ' -r %s' % options.mate_inner_dist + if options.settings == 'preSet': + cmd = cmd % ( opts, index_path, reads ) + else: + try: + if int( options.min_anchor_length ) >= 3: + opts += ' -a %s' % options.min_anchor_length + else: + raise Exception, 'Minimum anchor length must be 3 or greater' + opts += ' -m %s' % options.splice_mismatches + opts += ' -i %s' % options.min_intron_length + opts += ' -I %s' % options.max_intron_length + opts += ' -g %s' % options.max_multihits + # Custom junctions options. + if options.gene_model_annotations: + opts += ' -G %s' % options.gene_model_annotations + if options.raw_juncs: + opts += ' -j %s' % options.raw_juncs + if options.no_novel_juncs: + opts += ' --no-novel-juncs' + if options.library_type: + opts += ' --library-type %s' % options.library_type + if options.no_novel_indels: + opts += ' --no-novel-indels' + else: + if options.max_insertion_length: + opts += ' --max-insertion-length %i' % int( options.max_insertion_length ) + if options.max_deletion_length: + opts += ' --max-deletion-length %i' % int( options.max_deletion_length ) + # Max options do not work for Tophat v1.2.0, despite documentation to the contrary. (Fixed in version 1.3.1) + # need to warn user of this fact + #sys.stdout.write( "Max insertion length and max deletion length options don't work in Tophat v1.2.0\n" ) + + if options.transcriptome_mismatches: + opts += ' --transcriptome-mismatches %i' % int( options.transcriptome_mismatches ) + if options.genome_read_mismatches: + opts += ' --genome-read-mismatches %i' % int( options.genome_read_mismatches ) + if options.read_mismatches: + opts += ' --read-mismatches %i' % int( options.read_mismatches ) + if options.bowtie_n: + opts += ' --bowtie-n' + + # Search type options. + if options.coverage_search: + opts += ' --coverage-search --min-coverage-intron %s --max-coverage-intron %s' % ( options.min_coverage_intron, options.max_coverage_intron ) + else: + opts += ' --no-coverage-search' + if options.microexon_search: + opts += ' --microexon-search' + if options.single_paired == 'paired' and options.mate_std_dev: + opts += ' --mate-std-dev %s' % options.mate_std_dev + if options.seg_mismatches: + opts += ' --segment-mismatches %d' % int( options.seg_mismatches ) + if options.seg_length: + opts += ' --segment-length %d' % int( options.seg_length ) + if options.min_segment_intron: + opts += ' --min-segment-intron %d' % int( options.min_segment_intron ) + if options.max_segment_intron: + opts += ' --max-segment-intron %d' % int( options.max_segment_intron ) + + # Fusion search options. + if options.fusion_search: + opts += ' --fusion-search --fusion-anchor-length %i --fusion-min-dist %i --fusion-read-mismatches %i --fusion-multireads %i --fusion-multipairs %i --fusion-ignore-chromosomes %s' % \ + ( int( options.fusion_anchor_length ), int( options.fusion_min_dist ), + int( options.fusion_read_mismatches ), int( options.fusion_multireads ), + int( options.fusion_multipairs ), options.fusion_ignore_chromosomes ) + + cmd = cmd % ( opts, index_path, reads ) + + except Exception, e: + # Clean up temp dirs + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) + print cmd + + # Run + try: + tmp_out = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp_out, 'wb' ) + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open( tmp_err, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_err, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + + # Copy output files from tmp directory to specified files. + shutil.copyfile( os.path.join( "tophat_out", "junctions.bed" ), options.junctions_output_file ) + shutil.copyfile( os.path.join( "tophat_out", "accepted_hits.bam" ), options.accepted_hits_output_file ) + + # TODO: look for errors in program output. + except Exception, e: + stop_err( 'Error in tophat:\n' + str( e ) ) + + # Clean up temp dirs + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + +if __name__=="__main__": __main__() diff -r 0cffe389e1b3470e18494fb21491e75989282b97 -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 tools/ngs_rna/tophat2_wrapper.xml --- /dev/null +++ b/tools/ngs_rna/tophat2_wrapper.xml @@ -0,0 +1,542 @@ +<tool id="tophat2" name="Tophat2" version="0.5"> +  + <description>Gapped-read mapper for RNA-seq data</description> + <version_command>tophat --version</version_command> + <requirements> + <requirement type="package">tophat</requirement> + </requirements> + <command interpreter="python"> + tophat2_wrapper.py + + ## Change this to accommodate the number of threads you have available. + --num-threads="4" + + ## Provide outputs. + --junctions-output=$junctions + --hits-output=$accepted_hits + + ## Handle reference file. + #if $refGenomeSource.genomeSource == "history": + --own-file=$refGenomeSource.ownFile + #else: + --indexes-path="${refGenomeSource.index.fields.path}" + #end if + + ## Are reads single-end or paired? + --single-paired=$singlePaired.sPaired + + ## First input file always required. + --input1=$input1 + + ## Second input only if input is paired-end. + #if $singlePaired.sPaired == "paired" + --input2=$singlePaired.input2 + -r $singlePaired.mate_inner_distance + --mate-std-dev=$singlePaired.mate_std_dev + #end if + + ## Set params. + --settings=$params.settingsType + #if $params.settingsType == "full": + -n $params.transcriptome_mismatches + --genome-read-mismatches $params.genome_read_mismatches + --read-mismatches $params.read_mismatches + #if str($params.bowtie_n) == "Yes": + --bowtie-n + #end if + + -a $params.anchor_length + -m $params.splice_mismatches + -i $params.min_intron_length + -I $params.max_intron_length + -g $params.max_multihits + --min-segment-intron $params.min_segment_intron + --max-segment-intron $params.max_segment_intron + --seg-mismatches=$params.seg_mismatches + --seg-length=$params.seg_length + --library-type=$params.library_type + + ## Indel search. + #if $params.indel_search.allow_indel_search == "Yes": + ## --allow-indels + --max-insertion-length $params.indel_search.max_insertion_length + --max-deletion-length $params.indel_search.max_deletion_length + #else: + --no-novel-indels + #end if + + ## Supplying junctions parameters. + #if $params.own_junctions.use_junctions == "Yes": + #if $params.own_junctions.gene_model_ann.use_annotations == "Yes": + -G $params.own_junctions.gene_model_ann.gene_annotation_model + #end if + #if $params.own_junctions.raw_juncs.use_juncs == "Yes": + -j $params.own_junctions.raw_juncs.raw_juncs + #end if + ## TODO: No idea why a string cast is necessary, but it is: + #if str($params.own_junctions.no_novel_juncs) == "Yes": + --no-novel-juncs + #end if + #end if + + #if $params.coverage_search.use_search == "Yes": + --coverage-search + --min-coverage-intron $params.coverage_search.min_coverage_intron + --max-coverage-intron $params.coverage_search.max_coverage_intron + #else: + --no-coverage-search + #end if + ## TODO: No idea why the type conversion is necessary, but it seems to be. + #if str($params.microexon_search) == "Yes": + --microexon-search + #end if + + #if $params.fusion_search.do_search == "Yes": + --fusion-search + --fusion-anchor-length $params.fusion_search.anchor_len + --fusion-min-dist $params.fusion_search.min_dist + --fusion-read-mismatches $params.fusion_search.read_mismatches + --fusion-multireads $params.fusion_search.multireads + --fusion-multipairs $params.fusion_search.multipairs + --fusion-ignore-chromosomes $params.fusion_search.ignore_chromosomes + #end if + #end if + </command> + <inputs> + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/> + </when> + <when value="paired"> + <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> + <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> + <param name="mate_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs" /> + <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/> + </when> + </conditional> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> + <options from_data_table="tophat_indexes"> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + </when> + </conditional> + <conditional name="params"> + <param name="settingsType" type="select" label="TopHat settings to use" help="You can use the default settings or set custom values for any of Tophat's parameters."> + <option value="preSet">Use Defaults</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> +  + <when value="full"> + <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol."> + <option value="fr-unstranded">FR Unstranded</option> + <option value="fr-firststrand">FR First Strand</option> + <option value="fr-secondstrand">FR Second Strand</option> + </param> + <param name="transcriptome_mismatches" type="integer" value="2" label="Transcriptome mismatches" help="Maximum number of mismatches allowed when reads are aligned to the transcriptome. When Bowtie2 is used, this number is also used to decide whether or not to further re-align some of the transcriptome-mapped reads to the genome. If the alignment score of the best alignment among multiple candidates for a read is lower than 'bowtie2-min-score', which is internally defined as (max_penalty - 1) * max_mismatches, then the reads will be kept for re-alignment through the rest of the pipeline. You can specify max_penalty via '--b2-mp' option." /> + <param name="genome_read_mismatches" type="integer" value="2" label="Genome read mismatches" help="When whole reads are first mapped on the genome, this many mismatches in each read alignment are allowed. The default is 2. This number is also used to decide whether to further re-align some of the reads (by splitting them into segments) with a similar scoring threshold scheme as described for the --transcriptome-mismatches option above." /> + <param name="read_mismatches" type="integer" value="2" label="Final read mismatches" help="Final read alignments having more than these many mismatches are discarded." /> + <param name="bowtie_n" type="select" label="Use bowtie -n mode"> + <option selected="true" value="No">No</option> + <option value="Yes">Yes</option> + </param> + <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /> + <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /> + <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /> + <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> + <conditional name="indel_search"> + <param name="allow_indel_search" type="select" label="Allow indel search"> + <option value="Yes">Yes</option> + <option value="No">No</option> + </param> + <when value="No"/> + <when value="Yes"> + <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." /> + <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." /> + </when> + </conditional> + alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" /> + <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /> + <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> + <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /> + <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /> + <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> + +  + <conditional name="own_junctions"> + <param name="use_junctions" type="select" label="Use Own Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <conditional name="gene_model_ann"> + <param name="use_annotations" type="select" label="Use Gene Annotation Model"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/> + </when> + </conditional> + <conditional name="raw_juncs"> + <param name="use_juncs" type="select" label="Use Raw Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/> + </when> + </conditional> + <param name="no_novel_juncs" type="select" label="Only look for supplied junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + </when> + <when value="No" /> + </conditional> + +  + <conditional name="coverage_search"> + <param name="use_search" type="select" label="Use Coverage Search"> + <option selected="true" value="Yes">Yes</option> + <option value="No">No</option> + </param> + <when value="Yes"> + <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" /> + <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" /> + </when> + <when value="No" /> + </conditional> + +  + <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer."> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + +  + <conditional name="fusion_search"> + <param name="do_search" type="select" label="Do Fusion Search"> + <option selected="true" value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param name="anchor_len" type="integer" value="20" label="Anchor Length" help="A 'supporting' read must map to both sides of a fusion by at least this many bases."/> + <param name="min_dist" type="integer" value="10000000" label="Minimum Distance" help="For intra-chromosomal fusions, TopHat-Fusion tries to find fusions separated by at least this distance."/> + <param name="read_mismatches" type="integer" value="2" label="Read Mismatches" help="Reads support fusions if they map across fusion with at most this many mismatches."/> + <param name="multireads" type="integer" value="2" label="Multireads" help="Reads that map to more than this many places will be ignored. It may be possible that a fusion is supported by reads (or pairs) that map to multiple places."/> + <param name="multipairs" type="integer" value="2" label="Multipairs" help="Pairs that map to more than this many places will be ignored."/> + <param name="ignore_chromosomes" type="text" value='' label="Ignore some chromosomes such as chrM when detecting fusion break points"/> + </when> + </conditional> + </when> + </conditional> + </inputs> + + <outputs> + <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed"> + <actions> + <conditional name="refGenomeSource.genomeSource"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="tophat_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="refGenomeSource.index" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </data> + <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed"> + <actions> + <conditional name="refGenomeSource.genomeSource"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="tophat_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="refGenomeSource.index" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </data> + <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions"> + <actions> + <conditional name="refGenomeSource.genomeSource"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="tophat_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="refGenomeSource.index" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </data> + <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits"> + <actions> + <conditional name="refGenomeSource.genomeSource"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="tophat_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="refGenomeSource.index" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </data> + </outputs> + + <tests> +  + <test> +  + <param name="sPaired" value="single" /> + <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" /> + <param name="genomeSource" value="indexed" /> + <param name="index" value="tophat_test" /> + <param name="settingsType" value="preSet" /> + <output name="junctions" file="tophat_out1j.bed" /> + <output name="accepted_hits" file="tophat_out1h.bam" compare="sim_size" /> + </test> +  + <test> +  + <param name="sPaired" value="paired" /> + <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" /> + <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" /> + <param name="genomeSource" value="history" /> + <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" /> + <param name="mate_inner_distance" value="20" /> + <param name="settingsType" value="preSet" /> + <output name="junctions" file="tophat2_out2j.bed" /> + <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" /> + </test> +  + <test> +  + <param name="sPaired" value="single"/> + <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> + <param name="genomeSource" value="history"/> + <param name="ownFile" value="tophat_in1.fasta"/> + <param name="settingsType" value="full"/> + <param name="library_type" value="FR Unstranded"/> + <param name="transcriptome_mismatches" value="2"/> + <param name="genome_read_mismatches" value="2"/> + <param name="read_mismatches" value="2"/> + <param name="bowtie_n" value="No"/> + <param name="anchor_length" value="8"/> + <param name="splice_mismatches" value="0"/> + <param name="min_intron_length" value="70"/> + <param name="max_intron_length" value="500000"/> + <param name="max_multihits" value="40"/> + <param name="min_segment_intron" value="50" /> + <param name="max_segment_intron" value="500000" /> + <param name="seg_mismatches" value="2"/> + <param name="seg_length" value="25"/> + <param name="allow_indel_search" value="Yes"/> + <param name="max_insertion_length" value="3"/> + <param name="max_deletion_length" value="3"/> + <param name="use_junctions" value="Yes" /> + <param name="use_annotations" value="No" /> + <param name="use_juncs" value="No" /> + <param name="no_novel_juncs" value="No" /> + <param name="use_search" value="Yes" /> + <param name="min_coverage_intron" value="50" /> + <param name="max_coverage_intron" value="20000" /> + <param name="microexon_search" value="Yes" /> +  + <param name="do_search" value="Yes" /> + <param name="anchor_len" value="21" /> + <param name="min_dist" value="10000021" /> + <param name="read_mismatches" value="3" /> + <param name="multireads" value="4" /> + <param name="multipairs" value="5" /> + <param name="ignore_chromosomes" value="chrM"/> + <output name="insertions" file="tophat_out3i.bed" /> + <output name="deletions" file="tophat_out3d.bed" /> + <output name="junctions" file="tophat2_out3j.bed" /> + <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" /> + </test> +  + <test> +  + <param name="sPaired" value="paired"/> + <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> + <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/> + <param name="genomeSource" value="indexed"/> + <param name="index" value="tophat_test"/> + <param name="mate_inner_distance" value="20"/> + <param name="settingsType" value="full"/> + <param name="library_type" value="FR Unstranded"/> + <param name="transcriptome_mismatches" value="3"/> + <param name="genome_read_mismatches" value="4"/> + <param name="read_mismatches" value="5"/> + <param name="bowtie_n" value="Yes"/> + <param name="mate_std_dev" value="20"/> + <param name="anchor_length" value="8"/> + <param name="splice_mismatches" value="0"/> + <param name="min_intron_length" value="70"/> + <param name="max_intron_length" value="500000"/> + <param name="max_multihits" value="40"/> + <param name="min_segment_intron" value="50" /> + <param name="max_segment_intron" value="500000" /> + <param name="seg_mismatches" value="2"/> + <param name="seg_length" value="25"/> + <param name="allow_indel_search" value="No"/> + <param name="use_junctions" value="Yes" /> + <param name="use_annotations" value="No" /> + <param name="use_juncs" value="No" /> + <param name="no_novel_juncs" value="No" /> + <param name="use_search" value="No" /> + <param name="microexon_search" value="Yes" /> +  + <param name="do_search" value="Yes" /> + <param name="anchor_len" value="21" /> + <param name="min_dist" value="10000021" /> + <param name="read_mismatches" value="3" /> + <param name="multireads" value="4" /> + <param name="multipairs" value="5" /> + <param name="ignore_chromosomes" value="chrM"/> + <output name="junctions" file="tophat2_out4j.bed" /> + <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" /> + </test> + </tests> + + <help> +**Tophat Overview** + +TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009). + +.. _Tophat: http://tophat.cbcb.umd.edu/ + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + +.. __: http://tophat.cbcb.umd.edu/manual.html + +------ + +**Input formats** + +Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files. + +------ + +**Outputs** + +Tophat produces two output files: + +- junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction. +- accepted_hits -- A list of read alignments in BAM_ format. + +.. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1 +.. _BAM: http://samtools.sourceforge.net/ + +Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format. + +------- + +**Tophat settings** + +All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here. + +------ + +**Tophat parameter list** + +This is a list of implemented Tophat options:: + + -r This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments + selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter + is required for paired end runs. + --mate-std-dev INT The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp. + -a/--min-anchor-length INT The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced + alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one + read with this many bases on each side. This must be at least 3 and the default is 8. + -m/--splice-mismatches INT The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0. + -i/--min-intron-length INT The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70. + -I/--max-intron-length INT The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000. + -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of + exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the + filter. The default is 0.15. + -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many + alignments. The default is 40. + -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping. + -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive. + -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G) + --no-coverage-search Disables the coverage based search for junctions. + --coverage-search Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity. + --microexon-search With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer. + --segment-mismatches Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2. + --segment-length Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25. + --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50. + --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000. + --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50. + --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000. + </help> +</tool> https://bitbucket.org/galaxy/galaxy-central/changeset/0a13c751de1a/ changeset: 0a13c751de1a user: jgoecks date: 2012-04-25 17:33:05 summary: Revert Tophat wrapper to support versions 1.3.0-1.4.1 affected #: 5 files diff -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 -r 0a13c751de1adf649983aa66f53fd23811d03d8f test-data/tophat_out2j.bed --- a/test-data/tophat_out2j.bed +++ b/test-data/tophat_out2j.bed @@ -1,3 +1,3 @@ track name=junctions description="TopHat junctions" -test_chromosome 179 400 JUNC00000001 45 + 179 400 255,0,0 2 71,50 0,171 -test_chromosome 350 550 JUNC00000002 38 + 350 550 255,0,0 2 50,50 0,150 +test_chromosome 179 400 JUNC00000001 38 + 179 400 255,0,0 2 71,50 0,171 +test_chromosome 350 549 JUNC00000002 30 + 350 549 255,0,0 2 50,49 0,150 diff -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 -r 0a13c751de1adf649983aa66f53fd23811d03d8f test-data/tophat_out3j.bed --- a/test-data/tophat_out3j.bed +++ b/test-data/tophat_out3j.bed @@ -1,3 +1,3 @@ track name=junctions description="TopHat junctions" -test_chromosome 177 400 JUNC00000001 27 + 177 400 255,0,0 2 73,50 0,173 -test_chromosome 350 550 JUNC00000002 26 + 350 550 255,0,0 2 50,50 0,150 +test_chromosome 180 400 JUNC00000001 23 + 180 400 255,0,0 2 70,50 0,170 +test_chromosome 350 550 JUNC00000002 24 + 350 550 255,0,0 2 50,50 0,150 diff -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 -r 0a13c751de1adf649983aa66f53fd23811d03d8f test-data/tophat_out4j.bed --- a/test-data/tophat_out4j.bed +++ b/test-data/tophat_out4j.bed @@ -1,3 +1,3 @@ track name=junctions description="TopHat junctions" -test_chromosome 177 400 JUNC00000001 51 + 177 400 255,0,0 2 73,50 0,173 -test_chromosome 350 550 JUNC00000002 43 + 350 550 255,0,0 2 50,50 0,150 +test_chromosome 179 400 JUNC00000001 38 + 179 400 255,0,0 2 71,50 0,171 +test_chromosome 350 549 JUNC00000002 30 + 350 549 255,0,0 2 50,49 0,150 diff -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 -r 0a13c751de1adf649983aa66f53fd23811d03d8f tools/ngs_rna/tophat_wrapper.py --- a/tools/ngs_rna/tophat_wrapper.py +++ b/tools/ngs_rna/tophat_wrapper.py @@ -20,10 +20,6 @@ where each end is 50bp, you should set -r to be 200. There is no default, \ and this parameter is required for paired end runs.') parser.add_option( '', '--mate-std-dev', dest='mate_std_dev', help='Standard deviation of distribution on inner distances between male pairs.' ) - parser.add_option( '-n', '--transcriptome-mismatches', dest='transcriptome_mismatches' ) - parser.add_option( '', '--genome-read-mismatches', dest='genome_read_mismatches' ) - parser.add_option( '', '--read-mismatches', dest='read_mismatches' ) - parser.add_option( '', '--bowtie-n', action="store_true", dest='bowtie_n' ) parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length', help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' ) parser.add_option( '-m', '--splice-mismatches', dest='splice_mismatches', help='The maximum number of mismatches that can appear in the anchor region of a spliced alignment.' ) @@ -32,6 +28,7 @@ parser.add_option( '-I', '--max-intron-length', dest='max_intron_length', help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' ) parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' ) + parser.add_option( '', '--initial-read-mismatches', dest='initial_read_mismatches', help='Number of mismatches allowed in the initial read mapping' ) parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' ) parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' ) parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' ) @@ -56,21 +53,17 @@ parser.add_option( '', '--no-novel-indels', action="store_true", dest='no_novel_indels', help="Skip indel search. Indel search is enabled by default.") # Types of search. parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.') + parser.add_option( '', '--closure-search', action="store_true", dest='closure_search', help='Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (<= 50bp)') + parser.add_option( '', '--no-closure-search', action="store_false", dest='closure_search' ) parser.add_option( '', '--coverage-search', action="store_true", dest='coverage_search', help='Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.') parser.add_option( '', '--no-coverage-search', action="store_false", dest='coverage_search' ) parser.add_option( '', '--min-segment-intron', dest='min_segment_intron', help='Minimum intron length that may be found during split-segment search' ) parser.add_option( '', '--max-segment-intron', dest='max_segment_intron', help='Maximum intron length that may be found during split-segment search' ) + parser.add_option( '', '--min-closure-exon', dest='min_closure_exon', help='Minimum length for exonic hops in potential splice graph' ) + parser.add_option( '', '--min-closure-intron', dest='min_closure_intron', help='Minimum intron length that may be found during closure search' ) + parser.add_option( '', '--max-closure-intron', dest='max_closure_intron', help='Maximum intron length that may be found during closure search' ) parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' ) parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' ) - - # Fusion search options. - parser.add_option( '', '--fusion-search', action='store_true', dest='fusion_search' ) - parser.add_option( '', '--fusion-anchor-length', dest='fusion_anchor_length' ) - parser.add_option( '', '--fusion-min-dist', dest='fusion_min_dist' ) - parser.add_option( '', '--fusion-read-mismatches', dest='fusion_read_mismatches' ) - parser.add_option( '', '--fusion-multireads', dest='fusion_multireads' ) - parser.add_option( '', '--fusion-multipairs', dest='fusion_multipairs' ) - parser.add_option( '', '--fusion-ignore-chromosomes', dest='fusion_ignore_chromosomes' ) # Wrapper options. parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) @@ -176,25 +169,22 @@ # Max options do not work for Tophat v1.2.0, despite documentation to the contrary. (Fixed in version 1.3.1) # need to warn user of this fact #sys.stdout.write( "Max insertion length and max deletion length options don't work in Tophat v1.2.0\n" ) - - if options.transcriptome_mismatches: - opts += ' --transcriptome-mismatches %i' % int( options.transcriptome_mismatches ) - if options.genome_read_mismatches: - opts += ' --genome-read-mismatches %i' % int( options.genome_read_mismatches ) - if options.read_mismatches: - opts += ' --read-mismatches %i' % int( options.read_mismatches ) - if options.bowtie_n: - opts += ' --bowtie-n' # Search type options. if options.coverage_search: opts += ' --coverage-search --min-coverage-intron %s --max-coverage-intron %s' % ( options.min_coverage_intron, options.max_coverage_intron ) else: opts += ' --no-coverage-search' + if options.closure_search: + opts += ' --closure-search --min-closure-exon %s --min-closure-intron %s --max-closure-intron %s' % ( options.min_closure_exon, options.min_closure_intron, options.max_closure_intron ) + else: + opts += ' --no-closure-search' if options.microexon_search: opts += ' --microexon-search' - if options.single_paired == 'paired' and options.mate_std_dev: + if options.single_paired == 'paired': opts += ' --mate-std-dev %s' % options.mate_std_dev + if options.initial_read_mismatches: + opts += ' --initial-read-mismatches %d' % int( options.initial_read_mismatches ) if options.seg_mismatches: opts += ' --segment-mismatches %d' % int( options.seg_mismatches ) if options.seg_length: @@ -203,22 +193,13 @@ opts += ' --min-segment-intron %d' % int( options.min_segment_intron ) if options.max_segment_intron: opts += ' --max-segment-intron %d' % int( options.max_segment_intron ) - - # Fusion search options. - if options.fusion_search: - opts += ' --fusion-search --fusion-anchor-length %i --fusion-min-dist %i --fusion-read-mismatches %i --fusion-multireads %i --fusion-multipairs %i --fusion-ignore-chromosomes %s' % \ - ( int( options.fusion_anchor_length ), int( options.fusion_min_dist ), - int( options.fusion_read_mismatches ), int( options.fusion_multireads ), - int( options.fusion_multipairs ), options.fusion_ignore_chromosomes ) - cmd = cmd % ( opts, index_path, reads ) - except Exception, e: # Clean up temp dirs if os.path.exists( tmp_index_dir ): shutil.rmtree( tmp_index_dir ) stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) - print cmd + #print cmd # Run try: @@ -226,6 +207,7 @@ tmp_stdout = open( tmp_out, 'wb' ) tmp_err = tempfile.NamedTemporaryFile().name tmp_stderr = open( tmp_err, 'wb' ) + print cmd proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) returncode = proc.wait() tmp_stderr.close() diff -r de2891d266b2d489cc86cc8e7ea574d55cce85e8 -r 0a13c751de1adf649983aa66f53fd23811d03d8f tools/ngs_rna/tophat_wrapper.xml --- a/tools/ngs_rna/tophat_wrapper.xml +++ b/tools/ngs_rna/tophat_wrapper.xml @@ -1,5 +1,5 @@ <tool id="tophat" name="Tophat for Illumina" version="0.5"> -  + <description>Find splice junctions using RNA-seq data</description><version_command>tophat --version</version_command><requirements> @@ -7,117 +7,150 @@ </requirements><command interpreter="python"> tophat_wrapper.py - - ## Change this to accommodate the number of threads you have available. - --num-threads="4" + ## Change this to accommodate the number of threads you have available. + --num-threads="4" - ## Provide outputs. - --junctions-output=$junctions - --hits-output=$accepted_hits + ## Provide outputs. + --junctions-output=$junctions + --hits-output=$accepted_hits - ## Handle reference file. - #if $refGenomeSource.genomeSource == "history": - --own-file=$refGenomeSource.ownFile - #else: - --indexes-path="${refGenomeSource.index.fields.path}" - #end if - - ## Are reads single-end or paired? - --single-paired=$singlePaired.sPaired - - ## First input file always required. - --input1=$input1 - - ## Second input only if input is paired-end. - #if $singlePaired.sPaired == "paired" - --input2=$singlePaired.input2 - -r $singlePaired.mate_inner_distance - --mate-std-dev=$singlePaired.mate_std_dev - #end if - - ## Set params. - --settings=$params.settingsType - #if $params.settingsType == "full": - -n $params.transcriptome_mismatches - --genome-read-mismatches $params.genome_read_mismatches - --read-mismatches $params.read_mismatches - #if str($params.bowtie_n) == "Yes": - --bowtie-n - #end if - - -a $params.anchor_length - -m $params.splice_mismatches - -i $params.min_intron_length - -I $params.max_intron_length - -g $params.max_multihits - --min-segment-intron $params.min_segment_intron - --max-segment-intron $params.max_segment_intron - --seg-mismatches=$params.seg_mismatches - --seg-length=$params.seg_length - --library-type=$params.library_type - - ## Indel search. - #if $params.indel_search.allow_indel_search == "Yes": - ## --allow-indels - --max-insertion-length $params.indel_search.max_insertion_length - --max-deletion-length $params.indel_search.max_deletion_length + ## Handle reference file. + #if $refGenomeSource.genomeSource == "history": + --own-file=$refGenomeSource.ownFile #else: - --no-novel-indels + --indexes-path="${refGenomeSource.index.fields.path}" #end if - ## Supplying junctions parameters. - #if $params.own_junctions.use_junctions == "Yes": - #if $params.own_junctions.gene_model_ann.use_annotations == "Yes": - -G $params.own_junctions.gene_model_ann.gene_annotation_model + ## Are reads single-end or paired? + --single-paired=$singlePaired.sPaired + + ## First input file always required. + --input1=$input1 + + ## Set params based on whether reads are single-end or paired. + #if $singlePaired.sPaired == "single": + --settings=$singlePaired.sParams.sSettingsType + #if $singlePaired.sParams.sSettingsType == "full": + -a $singlePaired.sParams.anchor_length + -m $singlePaired.sParams.splice_mismatches + -i $singlePaired.sParams.min_intron_length + -I $singlePaired.sParams.max_intron_length + -g $singlePaired.sParams.max_multihits + --min-segment-intron $singlePaired.sParams.min_segment_intron + --max-segment-intron $singlePaired.sParams.max_segment_intron + --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches + --seg-mismatches=$singlePaired.sParams.seg_mismatches + --seg-length=$singlePaired.sParams.seg_length + --library-type=$singlePaired.sParams.library_type + + ## Indel search. + #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes": + ## --allow-indels + --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length + --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length + #else: + --no-novel-indels + #end if + + ## Supplying junctions parameters. + #if $singlePaired.sParams.own_junctions.use_junctions == "Yes": + #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes": + -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model + #end if + #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes": + -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs + #end if + ## TODO: No idea why a string cast is necessary, but it is: + #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes": + --no-novel-juncs + #end if + #end if + + #if $singlePaired.sParams.closure_search.use_search == "Yes": + --closure-search + --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon + --min-closure-intron $singlePaired.sParams.closure_search.min_closure_intron + --max-closure-intron $singlePaired.sParams.closure_search.max_closure_intron + #else: + --no-closure-search + #end if + #if $singlePaired.sParams.coverage_search.use_search == "Yes": + --coverage-search + --min-coverage-intron $singlePaired.sParams.coverage_search.min_coverage_intron + --max-coverage-intron $singlePaired.sParams.coverage_search.max_coverage_intron + #else: + --no-coverage-search + #end if + ## TODO: No idea why the type conversion is necessary, but it seems to be. + #if str($singlePaired.sParams.microexon_search) == "Yes": + --microexon-search + #end if #end if - #if $params.own_junctions.raw_juncs.use_juncs == "Yes": - -j $params.own_junctions.raw_juncs.raw_juncs - #end if - ## TODO: No idea why a string cast is necessary, but it is: - #if str($params.own_junctions.no_novel_juncs) == "Yes": - --no-novel-juncs + #else: + --input2=$singlePaired.input2 + -r $singlePaired.mate_inner_distance + --settings=$singlePaired.pParams.pSettingsType + #if $singlePaired.pParams.pSettingsType == "full": + --mate-std-dev=$singlePaired.pParams.mate_std_dev + -a $singlePaired.pParams.anchor_length + -m $singlePaired.pParams.splice_mismatches + -i $singlePaired.pParams.min_intron_length + -I $singlePaired.pParams.max_intron_length + -g $singlePaired.pParams.max_multihits + --min-segment-intron $singlePaired.pParams.min_segment_intron + --max-segment-intron $singlePaired.pParams.max_segment_intron + --initial-read-mismatches=$singlePaired.pParams.initial_read_mismatches + --seg-mismatches=$singlePaired.pParams.seg_mismatches + --seg-length=$singlePaired.pParams.seg_length + --library-type=$singlePaired.pParams.library_type + + ## Indel search. + #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes": + ## --allow-indels + --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length + --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length + #else: + --no-novel-indels + #end if + + ## Supplying junctions parameters. + #if $singlePaired.pParams.own_junctions.use_junctions == "Yes": + #if $singlePaired.pParams.own_junctions.gene_model_ann.use_annotations == "Yes": + -G $singlePaired.pParams.own_junctions.gene_model_ann.gene_annotation_model + #end if + #if $singlePaired.pParams.own_junctions.raw_juncs.use_juncs == "Yes": + -j $singlePaired.pParams.own_junctions.raw_juncs.raw_juncs + #end if + ## TODO: No idea why type cast is necessary, but it is: + #if str($singlePaired.pParams.own_junctions.no_novel_juncs) == "Yes": + --no-novel-juncs + #end if + #end if + + #if $singlePaired.pParams.closure_search.use_search == "Yes": + --closure-search + --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon + --min-closure-intron $singlePaired.pParams.closure_search.min_closure_intron + --max-closure-intron $singlePaired.pParams.closure_search.max_closure_intron + #else: + --no-closure-search + #end if + #if $singlePaired.pParams.coverage_search.use_search == "Yes": + --coverage-search + --min-coverage-intron $singlePaired.pParams.coverage_search.min_coverage_intron + --max-coverage-intron $singlePaired.pParams.coverage_search.max_coverage_intron + #else: + --no-coverage-search + #end if + ## TODO: No idea why the type conversion is necessary, but it seems to be. + #if str ($singlePaired.pParams.microexon_search) == "Yes": + --microexon-search + #end if #end if #end if - - #if $params.coverage_search.use_search == "Yes": - --coverage-search - --min-coverage-intron $params.coverage_search.min_coverage_intron - --max-coverage-intron $params.coverage_search.max_coverage_intron - #else: - --no-coverage-search - #end if - ## TODO: No idea why the type conversion is necessary, but it seems to be. - #if str($params.microexon_search) == "Yes": - --microexon-search - #end if - - #if $params.fusion_search.do_search == "Yes": - --fusion-search - --fusion-anchor-length $params.fusion_search.anchor_len - --fusion-min-dist $params.fusion_search.min_dist - --fusion-read-mismatches $params.fusion_search.read_mismatches - --fusion-multireads $params.fusion_search.multireads - --fusion-multipairs $params.fusion_search.multipairs - --fusion-ignore-chromosomes $params.fusion_search.ignore_chromosomes - #end if - #end if </command><inputs> - <conditional name="singlePaired"> - <param name="sPaired" type="select" label="Is this library mate-paired?"> - <option value="single">Single-end</option> - <option value="paired">Paired-end</option> - </param> - <when value="single"> - <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/> - </when> - <when value="paired"> - <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> - <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> - <param name="mate_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs" /> - <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/> - </when> - </conditional> + <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /><conditional name="refGenomeSource"><param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"><option value="indexed">Use a built-in index</option> @@ -135,120 +168,221 @@ <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /></when></conditional> - <conditional name="params"> - <param name="settingsType" type="select" label="TopHat settings to use" help="You can use the default settings or set custom values for any of Tophat's parameters."> - <option value="preSet">Use Defaults</option> - <option value="full">Full parameter list</option> + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option></param> - <when value="preSet" /> -  - <when value="full"> - <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol."> - <option value="fr-unstranded">FR Unstranded</option> - <option value="fr-firststrand">FR First Strand</option> - <option value="fr-secondstrand">FR Second Strand</option> - </param> - <param name="transcriptome_mismatches" type="integer" value="2" label="Transcriptome mismatches" help="Maximum number of mismatches allowed when reads are aligned to the transcriptome. When Bowtie2 is used, this number is also used to decide whether or not to further re-align some of the transcriptome-mapped reads to the genome. If the alignment score of the best alignment among multiple candidates for a read is lower than 'bowtie2-min-score', which is internally defined as (max_penalty - 1) * max_mismatches, then the reads will be kept for re-alignment through the rest of the pipeline. You can specify max_penalty via '--b2-mp' option." /> - <param name="genome_read_mismatches" type="integer" value="2" label="Genome read mismatches" help="When whole reads are first mapped on the genome, this many mismatches in each read alignment are allowed. The default is 2. This number is also used to decide whether to further re-align some of the reads (by splitting them into segments) with a similar scoring threshold scheme as described for the --transcriptome-mismatches option above." /> - <param name="read_mismatches" type="integer" value="2" label="Final read mismatches" help="Final read alignments having more than these many mismatches are discarded." /> - <param name="bowtie_n" type="select" label="Use bowtie -n mode"> - <option selected="true" value="No">No</option> - <option value="Yes">Yes</option> - </param> - <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /> - <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /> - <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /> - <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> - <conditional name="indel_search"> - <param name="allow_indel_search" type="select" label="Allow indel search"> + <when value="single"> + <conditional name="sParams"> + <param name="sSettingsType" type="select" label="TopHat settings to use" help="You can use the default settings or set custom values for any of Tophat's parameters."> + <option value="preSet">Use Defaults</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> +  + <when value="full"> + <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol."> + <option value="fr-unstranded">FR Unstranded</option> + <option value="fr-firststrand">FR First Strand</option> + <option value="fr-secondstrand">FR Second Strand</option> + </param> + <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /> + <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /> + <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /> + <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> + <conditional name="indel_search"> + <param name="allow_indel_search" type="select" label="Allow indel search"> + <option value="Yes">Yes</option> + <option value="No">No</option> + </param> + <when value="No"/> + <when value="Yes"> + <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." /> + <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." /> + </when> + </conditional> +alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" /> + <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /> + <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> + <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /> + <param name="initial_read_mismatches" type="integer" min="0" value="2" label="Number of mismatches allowed in the initial read mapping" /> + <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /> + <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> + +  + <conditional name="own_junctions"> + <param name="use_junctions" type="select" label="Use Own Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <conditional name="gene_model_ann"> + <param name="use_annotations" type="select" label="Use Gene Annotation Model"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/> + </when> + </conditional> + <conditional name="raw_juncs"> + <param name="use_juncs" type="select" label="Use Raw Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/> + </when> + </conditional> + <param name="no_novel_juncs" type="select" label="Only look for supplied junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + </when> + <when value="No" /> + </conditional> + +  + <conditional name="closure_search"> + <param name="use_search" type="select" label="Use Closure Search"> + <option value="No">No</option><option value="Yes">Yes</option> - <option value="No">No</option> - </param> - <when value="No"/> - <when value="Yes"> - <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." /> - <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." /> - </when> - </conditional> - alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" /> - <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /> - <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> - <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /> - <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /> - <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> - -  - <conditional name="own_junctions"> - <param name="use_junctions" type="select" label="Use Own Junctions"> + </param> + <when value="Yes"> + <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." /> + <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" /> + <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" /> + </when> + <when value="No" /> + </conditional> +  + <conditional name="coverage_search"> + <param name="use_search" type="select" label="Use Coverage Search"> + <option selected="true" value="Yes">Yes</option> + <option value="No">No</option> + </param> + <when value="Yes"> + <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" /> + <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" /> + </when> + <when value="No" /> + </conditional> + <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer."><option value="No">No</option><option value="Yes">Yes</option></param> - <when value="Yes"> - <conditional name="gene_model_ann"> - <param name="use_annotations" type="select" label="Use Gene Annotation Model"> - <option value="No">No</option> - <option value="Yes">Yes</option> - </param> - <when value="No" /> - <when value="Yes"> - <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/> - </when> - </conditional> - <conditional name="raw_juncs"> - <param name="use_juncs" type="select" label="Use Raw Junctions"> - <option value="No">No</option> - <option value="Yes">Yes</option> - </param> - <when value="No" /> - <when value="Yes"> - <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/> - </when> - </conditional> - <param name="no_novel_juncs" type="select" label="Only look for supplied junctions"> + </when> + </conditional> + </when> + <when value="paired"> + <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> + <param name="mate_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs" /> + <conditional name="pParams"> + <param name="pSettingsType" type="select" label="TopHat settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list"> + <option value="preSet">Commonly used</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> +  + <when value="full"> + <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol."> + <option value="fr-unstranded">FR Unstranded</option> + <option value="fr-firststrand">FR First Strand</option> + <option value="fr-secondstrand">FR Second Strand</option> + </param> + <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/> + <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /> + <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /> + <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /> + <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> + <conditional name="indel_search"> + <param name="allow_indel_search" type="select" label="Allow indel search"> + <option value="Yes">Yes</option> + <option value="No">No</option> + </param> + <when value="No"/> + <when value="Yes"> + <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." /> + <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." /> + </when> + </conditional> + <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /> + <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> + <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /> + <param name="initial_read_mismatches" type="integer" min="0" value="2" label="Number of mismatches allowed in the initial read mapping" /> + <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /> + <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> +  + <conditional name="own_junctions"> + <param name="use_junctions" type="select" label="Use Own Junctions"><option value="No">No</option><option value="Yes">Yes</option></param> - </when> - <when value="No" /> - </conditional> - -  - <conditional name="coverage_search"> - <param name="use_search" type="select" label="Use Coverage Search"> - <option selected="true" value="Yes">Yes</option> + <when value="Yes"> + <conditional name="gene_model_ann"> + <param name="use_annotations" type="select" label="Use Gene Annotation Model"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/> + </when> + </conditional> + <conditional name="raw_juncs"> + <param name="use_juncs" type="select" label="Use Raw Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/> + </when> + </conditional> + <param name="no_novel_juncs" type="select" label="Only look for supplied junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + </when> + <when value="No" /> + </conditional> + +  + <conditional name="closure_search"> + <param name="use_search" type="select" label="Use Closure Search"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." /> + <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" /> + <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" /> + </when> + <when value="No" /> + </conditional> +  + <conditional name="coverage_search"> + <param name="use_search" type="select" label="Use Coverage Search"> + <option selected="true" value="Yes">Yes</option> + <option value="No">No</option> + </param> + <when value="Yes"> + <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" /> + <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" /> + </when> + <when value="No" /> + </conditional> + <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer."><option value="No">No</option> - </param> - <when value="Yes"> - <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" /> - <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" /> - </when> - <when value="No" /> - </conditional> - -  - <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer."> - <option value="No">No</option> - <option value="Yes">Yes</option> - </param> - -  - <conditional name="fusion_search"> - <param name="do_search" type="select" label="Do Fusion Search"> - <option selected="true" value="No">No</option> - <option value="Yes">Yes</option> + <option value="Yes">Yes</option></param> - <when value="No" /> - <when value="Yes"> - <param name="anchor_len" type="integer" value="20" label="Anchor Length" help="A 'supporting' read must map to both sides of a fusion by at least this many bases."/> - <param name="min_dist" type="integer" value="10000000" label="Minimum Distance" help="For intra-chromosomal fusions, TopHat-Fusion tries to find fusions separated by at least this distance."/> - <param name="read_mismatches" type="integer" value="2" label="Read Mismatches" help="Reads support fusions if they map across fusion with at most this many mismatches."/> - <param name="multireads" type="integer" value="2" label="Multireads" help="Reads that map to more than this many places will be ignored. It may be possible that a fusion is supported by reads (or pairs) that map to multiple places."/> - <param name="multipairs" type="integer" value="2" label="Multipairs" help="Pairs that map to more than this many places will be ignored."/> - <param name="ignore_chromosomes" type="text" value='' label="Ignore some chromosomes such as chrM when detecting fusion break points"/> - </when> - </conditional> - </when> - </conditional> + </when> + </conditional> + </when> + </conditional></inputs><outputs> @@ -337,11 +471,11 @@ tophat -o tmp_dir -p 1 tophat_in1 test-data/tophat_in2.fastqsanger Rename the files in tmp_dir appropriately --> - <param name="sPaired" value="single" /><param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" /><param name="genomeSource" value="indexed" /><param name="index" value="tophat_test" /> - <param name="settingsType" value="preSet" /> + <param name="sPaired" value="single" /> + <param name="sSettingsType" value="preSet" /><output name="junctions" file="tophat_out1j.bed" /><output name="accepted_hits" file="tophat_out1h.bam" compare="sim_size" /></test> @@ -352,13 +486,13 @@ tophat -o tmp_dir -p 1 -r 20 tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger Rename the files in tmp_dir appropriately --> - <param name="sPaired" value="paired" /><param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" /> - <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" /><param name="genomeSource" value="history" /><param name="ownFile" ftype="fasta" value="tophat_in1.fasta" /> + <param name="sPaired" value="paired" /> + <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" /><param name="mate_inner_distance" value="20" /> - <param name="settingsType" value="preSet" /> + <param name="pSettingsType" value="preSet" /><output name="junctions" file="tophat_out2j.bed" /><output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" /></test> @@ -366,20 +500,16 @@ <test> - <param name="sPaired" value="single"/><param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/><param name="genomeSource" value="history"/><param name="ownFile" value="tophat_in1.fasta"/> - <param name="settingsType" value="full"/> + <param name="sPaired" value="single"/> + <param name="sSettingsType" value="full"/><param name="library_type" value="FR Unstranded"/> - <param name="transcriptome_mismatches" value="2"/> - <param name="genome_read_mismatches" value="2"/> - <param name="read_mismatches" value="2"/> - <param name="bowtie_n" value="No"/><param name="anchor_length" value="8"/><param name="splice_mismatches" value="0"/><param name="min_intron_length" value="70"/> @@ -397,17 +527,13 @@ <param name="use_juncs" value="No" /><param name="no_novel_juncs" value="No" /><param name="use_search" value="Yes" /> + <param name="min_closure_exon" value="50" /> + <param name="min_closure_intron" value="50" /> + <param name="max_closure_intron" value="5000" /> + <param name="use_search" value="Yes" /><param name="min_coverage_intron" value="50" /><param name="max_coverage_intron" value="20000" /><param name="microexon_search" value="Yes" /> -  - <param name="do_search" value="Yes" /> - <param name="anchor_len" value="21" /> - <param name="min_dist" value="10000021" /> - <param name="read_mismatches" value="3" /> - <param name="multireads" value="4" /> - <param name="multipairs" value="5" /> - <param name="ignore_chromosomes" value="chrM"/><output name="insertions" file="tophat_out3i.bed" /><output name="deletions" file="tophat_out3d.bed" /><output name="junctions" file="tophat_out3j.bed" /> @@ -420,18 +546,14 @@ Replace the + with double-dash Rename the files in tmp_dir appropriately --> - <param name="sPaired" value="paired"/><param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> - <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/><param name="genomeSource" value="indexed"/><param name="index" value="tophat_test"/> + <param name="sPaired" value="paired"/> + <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/><param name="mate_inner_distance" value="20"/> - <param name="settingsType" value="full"/> + <param name="pSettingsType" value="full"/><param name="library_type" value="FR Unstranded"/> - <param name="transcriptome_mismatches" value="3"/> - <param name="genome_read_mismatches" value="4"/> - <param name="read_mismatches" value="5"/> - <param name="bowtie_n" value="Yes"/><param name="mate_std_dev" value="20"/><param name="anchor_length" value="8"/><param name="splice_mismatches" value="0"/> @@ -449,14 +571,6 @@ <param name="no_novel_juncs" value="No" /><param name="use_search" value="No" /><param name="microexon_search" value="Yes" /> -  - <param name="do_search" value="Yes" /> - <param name="anchor_len" value="21" /> - <param name="min_dist" value="10000021" /> - <param name="read_mismatches" value="3" /> - <param name="multireads" value="4" /> - <param name="multipairs" value="5" /> - <param name="ignore_chromosomes" value="chrM"/><output name="junctions" file="tophat_out4j.bed" /><output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" /></test> @@ -529,11 +643,17 @@ -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping. -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive. -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G) + --no-closure-search Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default. + --closure-search Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp) --no-coverage-search Disables the coverage based search for junctions. --coverage-search Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity. --microexon-search With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer. + --butterfly-search TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts. --segment-mismatches Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2. --segment-length Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25. + --min-closure-exon During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50. + --min-closure-intron The minimum intron length that may be found during closure search. The default is 50. + --max-closure-intron The maximum intron length that may be found during closure search. The default is 5000. --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50. --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000. --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50. Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0

commit/galaxy-central: jgoecks: Fix class assignment for links/menus/popups in grids.
by Bitbucket 24 Apr '12

24 Apr '12

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/0cffe389e1b3/ changeset: 0cffe389e1b3 user: jgoecks date: 2012-04-25 04:57:56 summary: Fix class assignment for links/menus/popups in grids. affected #: 1 file diff -r af3265876bc3714d1768169d1f84eea358405c2b -r 0cffe389e1b3470e18494fb21491e75989282b97 templates/grid_base.mako --- a/templates/grid_base.mako +++ b/templates/grid_base.mako @@ -804,10 +804,10 @@ id = 'grid-%d-popup' % i # Determine appropriate class cls = "" - if column.attach_popup or href: + if column.attach_popup: cls = "menubutton" - if column.attach_popup and href: - cls = "menubutton split" + if href: + cls += " split" %><td ${nowrap}> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0

commit/galaxy-central: jgoecks: Include more Tophat2 options.
by Bitbucket 24 Apr '12

24 Apr '12

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/af3265876bc3/ changeset: af3265876bc3 user: jgoecks date: 2012-04-25 04:30:22 summary: Include more Tophat2 options. affected #: 4 files diff -r 00d26ae73e5aa56ef7e0f59b2b89ff92ca7aaaf7 -r af3265876bc3714d1768169d1f84eea358405c2b test-data/tophat_out3j.bed --- a/test-data/tophat_out3j.bed +++ b/test-data/tophat_out3j.bed @@ -1,3 +1,3 @@ track name=junctions description="TopHat junctions" -test_chromosome 180 400 JUNC00000001 23 + 180 400 255,0,0 2 70,50 0,170 -test_chromosome 350 550 JUNC00000002 24 + 350 550 255,0,0 2 50,50 0,150 +test_chromosome 177 400 JUNC00000001 27 + 177 400 255,0,0 2 73,50 0,173 +test_chromosome 350 550 JUNC00000002 26 + 350 550 255,0,0 2 50,50 0,150 diff -r 00d26ae73e5aa56ef7e0f59b2b89ff92ca7aaaf7 -r af3265876bc3714d1768169d1f84eea358405c2b test-data/tophat_out4j.bed --- a/test-data/tophat_out4j.bed +++ b/test-data/tophat_out4j.bed @@ -1,3 +1,3 @@ track name=junctions description="TopHat junctions" -test_chromosome 179 400 JUNC00000001 45 + 179 400 255,0,0 2 71,50 0,171 -test_chromosome 350 550 JUNC00000002 38 + 350 550 255,0,0 2 50,50 0,150 +test_chromosome 177 400 JUNC00000001 51 + 177 400 255,0,0 2 73,50 0,173 +test_chromosome 350 550 JUNC00000002 43 + 350 550 255,0,0 2 50,50 0,150 diff -r 00d26ae73e5aa56ef7e0f59b2b89ff92ca7aaaf7 -r af3265876bc3714d1768169d1f84eea358405c2b tools/ngs_rna/tophat_wrapper.py --- a/tools/ngs_rna/tophat_wrapper.py +++ b/tools/ngs_rna/tophat_wrapper.py @@ -20,6 +20,10 @@ where each end is 50bp, you should set -r to be 200. There is no default, \ and this parameter is required for paired end runs.') parser.add_option( '', '--mate-std-dev', dest='mate_std_dev', help='Standard deviation of distribution on inner distances between male pairs.' ) + parser.add_option( '-n', '--transcriptome-mismatches', dest='transcriptome_mismatches' ) + parser.add_option( '', '--genome-read-mismatches', dest='genome_read_mismatches' ) + parser.add_option( '', '--read-mismatches', dest='read_mismatches' ) + parser.add_option( '', '--bowtie-n', action="store_true", dest='bowtie_n' ) parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length', help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' ) parser.add_option( '-m', '--splice-mismatches', dest='splice_mismatches', help='The maximum number of mismatches that can appear in the anchor region of a spliced alignment.' ) @@ -172,6 +176,15 @@ # Max options do not work for Tophat v1.2.0, despite documentation to the contrary. (Fixed in version 1.3.1) # need to warn user of this fact #sys.stdout.write( "Max insertion length and max deletion length options don't work in Tophat v1.2.0\n" ) + + if options.transcriptome_mismatches: + opts += ' --transcriptome-mismatches %i' % int( options.transcriptome_mismatches ) + if options.genome_read_mismatches: + opts += ' --genome-read-mismatches %i' % int( options.genome_read_mismatches ) + if options.read_mismatches: + opts += ' --read-mismatches %i' % int( options.read_mismatches ) + if options.bowtie_n: + opts += ' --bowtie-n' # Search type options. if options.coverage_search: diff -r 00d26ae73e5aa56ef7e0f59b2b89ff92ca7aaaf7 -r af3265876bc3714d1768169d1f84eea358405c2b tools/ngs_rna/tophat_wrapper.xml --- a/tools/ngs_rna/tophat_wrapper.xml +++ b/tools/ngs_rna/tophat_wrapper.xml @@ -38,6 +38,13 @@ ## Set params. --settings=$params.settingsType #if $params.settingsType == "full": + -n $params.transcriptome_mismatches + --genome-read-mismatches $params.genome_read_mismatches + --read-mismatches $params.read_mismatches + #if str($params.bowtie_n) == "Yes": + --bowtie-n + #end if + -a $params.anchor_length -m $params.splice_mismatches -i $params.min_intron_length @@ -141,6 +148,13 @@ <option value="fr-firststrand">FR First Strand</option><option value="fr-secondstrand">FR Second Strand</option></param> + <param name="transcriptome_mismatches" type="integer" value="2" label="Transcriptome mismatches" help="Maximum number of mismatches allowed when reads are aligned to the transcriptome. When Bowtie2 is used, this number is also used to decide whether or not to further re-align some of the transcriptome-mapped reads to the genome. If the alignment score of the best alignment among multiple candidates for a read is lower than 'bowtie2-min-score', which is internally defined as (max_penalty - 1) * max_mismatches, then the reads will be kept for re-alignment through the rest of the pipeline. You can specify max_penalty via '--b2-mp' option." /> + <param name="genome_read_mismatches" type="integer" value="2" label="Genome read mismatches" help="When whole reads are first mapped on the genome, this many mismatches in each read alignment are allowed. The default is 2. This number is also used to decide whether to further re-align some of the reads (by splitting them into segments) with a similar scoring threshold scheme as described for the --transcriptome-mismatches option above." /> + <param name="read_mismatches" type="integer" value="2" label="Final read mismatches" help="Final read alignments having more than these many mismatches are discarded." /> + <param name="bowtie_n" type="select" label="Use bowtie -n mode"> + <option selected="true" value="No">No</option> + <option value="Yes">Yes</option> + </param><param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /><param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /><param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /> @@ -362,6 +376,10 @@ <param name="ownFile" value="tophat_in1.fasta"/><param name="settingsType" value="full"/><param name="library_type" value="FR Unstranded"/> + <param name="transcriptome_mismatches" value="2"/> + <param name="genome_read_mismatches" value="2"/> + <param name="read_mismatches" value="2"/> + <param name="bowtie_n" value="No"/><param name="anchor_length" value="8"/><param name="splice_mismatches" value="0"/><param name="min_intron_length" value="70"/> @@ -410,6 +428,10 @@ <param name="mate_inner_distance" value="20"/><param name="settingsType" value="full"/><param name="library_type" value="FR Unstranded"/> + <param name="transcriptome_mismatches" value="3"/> + <param name="genome_read_mismatches" value="4"/> + <param name="read_mismatches" value="5"/> + <param name="bowtie_n" value="Yes"/><param name="mate_std_dev" value="20"/><param name="anchor_length" value="8"/><param name="splice_mismatches" value="0"/> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0

commit/galaxy-central: greg: Fix for installing from a list of tool shed repositories that matched a search criteria.
by Bitbucket 24 Apr '12

24 Apr '12

1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/00d26ae73e5a/ changeset: 00d26ae73e5a user: greg date: 2012-04-24 20:25:23 summary: Fix for installing from a list of tool shed repositories that matched a search criteria. affected #: 1 file diff -r e34f80fe39ecab5d76cd20ded0eeac449351b482 -r 00d26ae73e5aa56ef7e0f59b2b89ff92ca7aaaf7 lib/galaxy/webapps/community/controllers/repository.py --- a/lib/galaxy/webapps/community/controllers/repository.py +++ b/lib/galaxy/webapps/community/controllers/repository.py @@ -755,10 +755,14 @@ if not includes_tools and 'tools' in repository_metadata.metadata: includes_tools = True repository = get_repository( trans, trans.security.encode_id( repository_metadata.repository_id ) ) + # Get the changelog rev for this changeset_revision. + repo_dir = repository.repo_path + repo = hg.repository( get_configured_ui(), repo_dir ) + changeset_revision = repository_metadata.changeset_revision + ctx = get_changectx_for_changeset( repo, changeset_revision ) repository_id = trans.security.encode_id( repository.id ) - changeset_revision = repository_metadata.changeset_revision repository_clone_url = generate_clone_url( trans, repository_id ) - repo_info_dict[ repository.name ] = ( repository.description, repository_clone_url, changeset_revision ) + repo_info_dict[ repository.name ] = ( repository.description, repository_clone_url, changeset_revision, str( ctx.rev() ) ) return encode( repo_info_dict ), includes_tools @web.expose def preview_tools_in_changeset( self, trans, repository_id, **kwd ): Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.

1 0