commit/galaxy-central: jgoecks: Add options to Tophat wrapper to support v1.2.0 functionality: (a) allow indel search; (b) max insertion and max deletion lengths; and (c) library type. Update functional tests to tests new options. Also update function that checks from_work_dir output attribute to use real path to prevent mishandling of symbolic links.
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/c0d6d17fc7db/ changeset: r5122:c0d6d17fc7db user: jgoecks date: 2011-02-24 21:24:52 summary: Add options to Tophat wrapper to support v1.2.0 functionality: (a) allow indel search; (b) max insertion and max deletion lengths; and (c) library type. Update functional tests to tests new options. Also update function that checks from_work_dir output attribute to use real path to prevent mishandling of symbolic links. affected #: 5 files (6.0 KB) --- a/lib/galaxy/jobs/__init__.py Thu Feb 24 15:23:21 2011 -0500 +++ b/lib/galaxy/jobs/__init__.py Thu Feb 24 15:24:52 2011 -0500 @@ -505,10 +505,14 @@ return job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) ) job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None ) - def file_in_dir( file_path, a_dir ): - """ Returns true if file is in directory. """ - abs_file_path = os.path.abspath( file_path ) - return os.path.split( abs_file_path )[0] == os.path.abspath( a_dir ) + def in_directory( file, directory ): + # Make both absolute. + directory = os.path.realpath( directory ) + file = os.path.realpath( file ) + + #Return true, if the common prefix of both is equal to directory + #e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b + return os.path.commonprefix( [ file, directory ] ) == directory for dataset_assoc in job.output_datasets + job.output_library_datasets: context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset ) #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur @@ -523,7 +527,7 @@ if hda_tool_output and hda_tool_output.from_work_dir: # Copy from working dir to HDA. source_file = os.path.join( os.path.abspath( self.working_directory ), hda_tool_output.from_work_dir ) - if file_in_dir( source_file, self.working_directory ): + if in_directory( source_file, self.working_directory ): try: shutil.move( source_file, dataset.file_name ) log.debug( "finish(): Moved %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) ) --- a/tools/ngs_rna/tophat_wrapper.py Thu Feb 24 15:23:21 2011 -0500 +++ b/tools/ngs_rna/tophat_wrapper.py Thu Feb 24 15:24:52 2011 -0500 @@ -30,6 +30,10 @@ parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' ) parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' ) parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' ) + parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' ) + parser.add_option( '', '--allow-indels', action="store_true", help='Allow indel search. Indel search is disabled by default.' ) + parser.add_option( '', '--max-insertion-length', dest='max_insertion_length', help='The maximum insertion length. The default is 3.' ) + parser.add_option( '', '--max-deletion-length', dest='max_deletion_length', help='The maximum deletion length. The default is 3.' ) # Options for supplying own junctions parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \ @@ -115,8 +119,7 @@ index_path = options.index_path # Build tophat command. - tmp_output_dir = tempfile.mkdtemp() - cmd = 'tophat -o %s %s %s %s' + cmd = 'tophat %s %s %s' reads = options.input1 if options.input2: reads += ' ' + options.input2 @@ -124,7 +127,7 @@ if options.single_paired == 'paired': opts += ' -r %s' % options.mate_inner_dist if options.settings == 'preSet': - cmd = cmd % ( tmp_output_dir, opts, index_path, reads ) + cmd = cmd % ( opts, index_path, reads ) else: try: if int( options.min_anchor_length ) >= 3: @@ -144,6 +147,13 @@ opts += ' -j %s' % options.raw_juncs if options.no_novel_juncs: opts += ' --no-novel-juncs' + if options.library_type: + opts += ' --library-type %s' % options.library_type + if options.allow_indels: + # Max options do not work for Tophat v1.2.0, despite documentation to the contrary. + opts += ' --allow-indels' + #opts += ' --allow-indels --max-insertion-length %i --max-deletion-length %i' % ( int( options.max_insertion_length ), int( options.max_deletion_length ) ) + # Search type options. if options.coverage_search: @@ -166,23 +176,21 @@ opts += ' --min-segment-intron %d' % int(options.min_segment_intron) if options.max_segment_intron: opts += ' --max-segment-intron %d' % int(options.max_segment_intron) - cmd = cmd % ( tmp_output_dir, opts, index_path, reads ) + cmd = cmd % ( opts, index_path, reads ) except Exception, e: # Clean up temp dirs if os.path.exists( tmp_index_dir ): shutil.rmtree( tmp_index_dir ) - if os.path.exists( tmp_output_dir ): - shutil.rmtree( tmp_output_dir ) stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) print cmd # Run try: - tmp_out = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name + tmp_out = tempfile.NamedTemporaryFile().name tmp_stdout = open( tmp_out, 'wb' ) - tmp_err = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name + tmp_err = tempfile.NamedTemporaryFile().name tmp_stderr = open( tmp_err, 'wb' ) - proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_output_dir, stdout=tmp_stdout, stderr=tmp_stderr ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) returncode = proc.wait() tmp_stderr.close() # get stderr, allowing for case where it's very large @@ -202,17 +210,11 @@ raise Exception, stderr # TODO: look for errors in program output. - - # Copy output files from tmp directory to specified files. - shutil.copyfile( os.path.join( tmp_output_dir, "junctions.bed" ), options.junctions_output_file ) - shutil.copyfile( os.path.join( tmp_output_dir, "accepted_hits.bam" ), options.accepted_hits_output_file ) except Exception, e: stop_err( 'Error in tophat:\n' + str( e ) ) # Clean up temp dirs if os.path.exists( tmp_index_dir ): shutil.rmtree( tmp_index_dir ) - if os.path.exists( tmp_output_dir ): - shutil.rmtree( tmp_output_dir ) if __name__=="__main__": __main__() --- a/tools/ngs_rna/tophat_wrapper.xml Thu Feb 24 15:23:21 2011 -0500 +++ b/tools/ngs_rna/tophat_wrapper.xml Thu Feb 24 15:24:52 2011 -0500 @@ -39,6 +39,14 @@ --max-segment-intron $singlePaired.sParams.max_segment_intron --seg-mismatches=$singlePaired.sParams.seg_mismatches --seg-length=$singlePaired.sParams.seg_length + --library-type=$singlePaired.sParams.library_type + + ## Indel search. + #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes": + --allow-indels + --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length + --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length + #end if ## Supplying junctions parameters. #if $singlePaired.sParams.own_junctions.use_junctions == "Yes": @@ -90,6 +98,14 @@ --max-segment-intron $singlePaired.pParams.max_segment_intron --seg-mismatches=$singlePaired.pParams.seg_mismatches --seg-length=$singlePaired.pParams.seg_length + --library-type=$singlePaired.pParams.library_type + + ## Indel search. + #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes": + --allow-indels + --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length + --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length + #end if ## Supplying junctions parameters. #if $singlePaired.pParams.own_junctions.use_junctions == "Yes": @@ -157,10 +173,26 @@ <when value="preSet" /><!-- Full/advanced parms. --><when value="full"> + <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol."> + <option value="fr-unstranded">FR Unstranded</option> + <option value="fr-firststrand">FR First Strand</option> + <option value="fr-secondstrand">FR Second Strand</option> + </param><param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /><param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /><param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /><param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> + <conditional name="indel_search"> + <param name="allow_indel_search" type="select" label="Allow indel search"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No"/> + <when value="Yes"> + <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." /> + <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." /> + </when> + </conditional><param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" /><param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" /><param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> @@ -247,11 +279,27 @@ <when value="preSet" /><!-- Full/advanced parms. --><when value="full"> + <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol."> + <option value="fr-unstranded">FR Unstranded</option> + <option value="fr-firststrand">FR First Strand</option> + <option value="fr-secondstrand">FR Second Strand</option> + </param><param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/><param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /><param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /><param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /><param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> + <conditional name="indel_search"> + <param name="allow_indel_search" type="select" label="Allow indel search"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No"/> + <when value="Yes"> + <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." /> + <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." /> + </when> + </conditional><param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" /><param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" /><param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> @@ -329,8 +377,28 @@ </inputs><outputs> - <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions"/> - <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits"/> + <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed"> + <filter> + ( + ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and + ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or + ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and + ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) + ) + </filter> + </data> + <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed"> + <filter> + ( + ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and + ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or + ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and + ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) + ) + </filter> + </data> + <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions" from_work_dir="tophat_out/junctions.bed"/> + <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits" from_work_dir="tophat_out/accepted_hits.bam"/></outputs><tests> @@ -367,7 +435,7 @@ <test><!-- Tophat commands: bowtie-build -f test-data/tophat_in1.fasta tophat_in1 - tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger + tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 ++allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger Replace the + with double-dash --><param name="genomeSource" value="history"/> @@ -375,6 +443,7 @@ <param name="sPaired" value="single"/><param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/><param name="sSettingsType" value="full"/> + <param name="library_type" value="FR Unstranded"/><param name="anchor_length" value="8"/><param name="splice_mismatches" value="0"/><param name="min_intron_length" value="70"/> @@ -386,6 +455,9 @@ <param name="max_segment_intron" value="500000" /><param name="seg_mismatches" value="2"/><param name="seg_length" value="25"/> + <param name="allow_indel_search" value="Yes"/> + <param name="max_insertion_length" value="3"/> + <param name="max_deletion_length" value="3"/><param name="use_junctions" value="Yes" /><param name="use_annotations" value="No" /><param name="use_juncs" value="No" /> @@ -398,6 +470,8 @@ <param name="min_coverage_intron" value="50" /><param name="max_coverage_intron" value="20000" /><param name="microexon_search" value="Yes" /> + <output name="insertions" file="tophat_out3i.bed" ftype="bed"/> + <output name="deletions" file="tophat_out3d.bed" ftype="bed"/><output name="junctions" file="tophat_out3j.bed" ftype="bed" /><output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" ftype="bam" /></test> @@ -414,6 +488,7 @@ <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/><param name="mate_inner_distance" value="20"/><param name="pSettingsType" value="full"/> + <param name="library_type" value="FR Unstranded"/><param name="mate_std_dev" value="20"/><param name="anchor_length" value="8"/><param name="splice_mismatches" value="0"/> @@ -426,6 +501,7 @@ <param name="max_segment_intron" value="500000" /><param name="seg_mismatches" value="2"/><param name="seg_length" value="25"/> + <param name="allow_indel_search" value="No"/><param name="use_junctions" value="Yes" /><param name="use_annotations" value="No" /><param name="use_juncs" value="No" /> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket