commit/galaxy-central: jgoecks: Add options to Tophat wrapper to support v1.2.0 functionality: (a) allow indel search; (b) max insertion and max deletion lengths; and (c) library type. Update functional tests to tests new options. Also update function that checks from_work_dir output attribute to use real path to prevent mishandling of symbolic links. - galaxy-commits

24 Feb 2011

1 new changeset in galaxy-central:

http://bitbucket.org/galaxy/galaxy-central/changeset/c0d6d17fc7db/
changeset:   r5122:c0d6d17fc7db
user:        jgoecks
date:        2011-02-24 21:24:52
summary:     Add options to Tophat wrapper to support v1.2.0 functionality: (a) allow indel search; (b) max insertion and max deletion lengths; and (c) library type. Update functional tests to tests new options. Also update function that checks from_work_dir output attribute to use real path to prevent mishandling of symbolic links.
affected #:  5 files (6.0 KB)

--- a/lib/galaxy/jobs/__init__.py	Thu Feb 24 15:23:21 2011 -0500
+++ b/lib/galaxy/jobs/__init__.py	Thu Feb 24 15:24:52 2011 -0500
@@ -505,10 +505,14 @@
                         return
         job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) )
         job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None )
-        def file_in_dir( file_path, a_dir ):
-            """ Returns true if file is in directory. """
-            abs_file_path = os.path.abspath( file_path )
-            return os.path.split( abs_file_path )[0] == os.path.abspath( a_dir )
+        def in_directory( file, directory ):
+            # Make both absolute.
+            directory = os.path.realpath( directory )
+            file = os.path.realpath( file )
+
+            #Return true, if the common prefix of both is equal to directory
+            #e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
+            return os.path.commonprefix( [ file, directory ] ) == directory
         for dataset_assoc in job.output_datasets + job.output_library_datasets:
             context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset )
             #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur
@@ -523,7 +527,7 @@
                         if hda_tool_output and hda_tool_output.from_work_dir:
                             # Copy from working dir to HDA.
                             source_file = os.path.join( os.path.abspath( self.working_directory ), hda_tool_output.from_work_dir )
-                            if file_in_dir( source_file, self.working_directory ):
+                            if in_directory( source_file, self.working_directory ):
                                 try:
                                     shutil.move( source_file, dataset.file_name )
                                     log.debug( "finish(): Moved %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) )


--- a/tools/ngs_rna/tophat_wrapper.py	Thu Feb 24 15:23:21 2011 -0500
+++ b/tools/ngs_rna/tophat_wrapper.py	Thu Feb 24 15:24:52 2011 -0500
@@ -30,6 +30,10 @@
     parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' )
     parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' )
     parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' )
+    parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' )
+    parser.add_option( '', '--allow-indels', action="store_true", help='Allow indel search. Indel search is disabled by default.' )
+    parser.add_option( '', '--max-insertion-length', dest='max_insertion_length', help='The maximum insertion length. The default is 3.' )
+    parser.add_option( '', '--max-deletion-length', dest='max_deletion_length', help='The maximum deletion length. The default is 3.' )
 
     # Options for supplying own junctions
     parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \
@@ -115,8 +119,7 @@
         index_path = options.index_path
 
     # Build tophat command.
-    tmp_output_dir = tempfile.mkdtemp()
-    cmd = 'tophat -o %s %s %s %s'
+    cmd = 'tophat %s %s %s'
     reads = options.input1
     if options.input2:
         reads += ' ' + options.input2
@@ -124,7 +127,7 @@
     if options.single_paired == 'paired':
         opts += ' -r %s' % options.mate_inner_dist
     if options.settings == 'preSet':
-        cmd = cmd % ( tmp_output_dir, opts, index_path, reads )
+        cmd = cmd % ( opts, index_path, reads )
     else:
         try:
             if int( options.min_anchor_length ) >= 3:
@@ -144,6 +147,13 @@
                 opts += ' -j %s' % options.raw_juncs
             if options.no_novel_juncs:
                 opts += ' --no-novel-juncs'
+            if options.library_type:
+                opts += ' --library-type %s' % options.library_type
+            if options.allow_indels:
+                # Max options do not work for Tophat v1.2.0, despite documentation to the contrary.
+                opts += ' --allow-indels'
+                #opts += ' --allow-indels --max-insertion-length %i --max-deletion-length %i' % ( int( options.max_insertion_length ), int( options.max_deletion_length ) )
+
 
             # Search type options.
             if options.coverage_search:
@@ -166,23 +176,21 @@
                 opts += ' --min-segment-intron %d' % int(options.min_segment_intron)
             if options.max_segment_intron:
                 opts += ' --max-segment-intron %d' % int(options.max_segment_intron)
-            cmd = cmd % ( tmp_output_dir, opts, index_path, reads )
+            cmd = cmd % ( opts, index_path, reads )
         except Exception, e:
             # Clean up temp dirs
             if os.path.exists( tmp_index_dir ):
                 shutil.rmtree( tmp_index_dir )
-            if os.path.exists( tmp_output_dir ):
-                shutil.rmtree( tmp_output_dir )
             stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
     print cmd
 
     # Run
     try:
-        tmp_out = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
+        tmp_out = tempfile.NamedTemporaryFile().name
         tmp_stdout = open( tmp_out, 'wb' )
-        tmp_err = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
+        tmp_err = tempfile.NamedTemporaryFile().name
         tmp_stderr = open( tmp_err, 'wb' )
-        proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_output_dir, stdout=tmp_stdout, stderr=tmp_stderr )
+        proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
         returncode = proc.wait()
         tmp_stderr.close()
         # get stderr, allowing for case where it's very large
@@ -202,17 +210,11 @@
             raise Exception, stderr
 
         # TODO: look for errors in program output.
-
-        # Copy output files from tmp directory to specified files.
-        shutil.copyfile( os.path.join( tmp_output_dir, "junctions.bed" ), options.junctions_output_file )
-        shutil.copyfile( os.path.join( tmp_output_dir, "accepted_hits.bam" ), options.accepted_hits_output_file )
     except Exception, e:
         stop_err( 'Error in tophat:\n' + str( e ) ) 
 
     # Clean up temp dirs
     if os.path.exists( tmp_index_dir ):
         shutil.rmtree( tmp_index_dir )
-    if os.path.exists( tmp_output_dir ):
-        shutil.rmtree( tmp_output_dir )
 
 if __name__=="__main__": __main__()


--- a/tools/ngs_rna/tophat_wrapper.xml	Thu Feb 24 15:23:21 2011 -0500
+++ b/tools/ngs_rna/tophat_wrapper.xml	Thu Feb 24 15:24:52 2011 -0500
@@ -39,6 +39,14 @@
                     --max-segment-intron $singlePaired.sParams.max_segment_intron
                     --seg-mismatches=$singlePaired.sParams.seg_mismatches
                     --seg-length=$singlePaired.sParams.seg_length
+                    --library-type=$singlePaired.sParams.library_type
+                    
+                    ## Indel search.
+                    #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":
+                        --allow-indels
+                        --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length
+                        --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length
+                    #end if
 
                     ## Supplying junctions parameters.
                     #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":
@@ -90,6 +98,14 @@
                     --max-segment-intron $singlePaired.pParams.max_segment_intron
                     --seg-mismatches=$singlePaired.pParams.seg_mismatches
                     --seg-length=$singlePaired.pParams.seg_length
+                    --library-type=$singlePaired.pParams.library_type
+                    
+		    ## Indel search.
+                    #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes":
+                        --allow-indels
+                        --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length
+                        --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length
+                    #end if
 
                     ## Supplying junctions parameters.
                     #if $singlePaired.pParams.own_junctions.use_junctions == "Yes":
@@ -157,10 +173,26 @@
                 <when value="preSet" /><!-- Full/advanced parms. --><when value="full">
+                  <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
+                      <option value="fr-unstranded">FR Unstranded</option>
+                      <option value="fr-firststrand">FR First Strand</option>
+                      <option value="fr-secondstrand">FR Second Strand</option>
+                  </param><param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /><param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /><param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /><param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
+                  <conditional name="indel_search">
+                      <param name="allow_indel_search" type="select" label="Allow indel search">
+                          <option value="No">No</option>
+                          <option value="Yes">Yes</option>
+                      </param>
+                      <when value="No"/>
+                      <when value="Yes">
+                         <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
+                         <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
+                      </when>
+                  </conditional><param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" /><param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" /><param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
@@ -247,11 +279,27 @@
                 <when value="preSet" /><!-- Full/advanced parms. --><when value="full">
+                    <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
+                        <option value="fr-unstranded">FR Unstranded</option>
+                        <option value="fr-firststrand">FR First Strand</option>
+                        <option value="fr-secondstrand">FR Second Strand</option>
+                    </param><param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs"  help="The standard deviation for the distribution on inner distances between mate pairs."/><param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /><param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /><param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /><param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
+                  <conditional name="indel_search">
+                      <param name="allow_indel_search" type="select" label="Allow indel search">
+                          <option value="No">No</option>
+                          <option value="Yes">Yes</option>
+                      </param>
+                      <when value="No"/>
+                      <when value="Yes">
+                         <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
+                         <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
+                      </when>
+                  </conditional><param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" /><param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" /><param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
@@ -329,8 +377,28 @@
     </inputs><outputs>
-        <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions"/>
-        <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits"/>
+        <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed">
+            <filter>
+                (
+                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and 
+                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or 
+                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and 
+                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
+                ) 
+            </filter>
+        </data>
+        <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed">
+            <filter>
+                (
+                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and 
+                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or 
+                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and 
+                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
+                )
+            </filter>
+        </data>
+        <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions" from_work_dir="tophat_out/junctions.bed"/>
+        <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits" from_work_dir="tophat_out/accepted_hits.bam"/></outputs><tests>
@@ -367,7 +435,7 @@
         <test><!-- Tophat commands:
             bowtie-build -f test-data/tophat_in1.fasta tophat_in1
-            tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
+            tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 ++allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
             Replace the + with double-dash
             --><param name="genomeSource" value="history"/>
@@ -375,6 +443,7 @@
             <param name="sPaired" value="single"/><param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/><param name="sSettingsType" value="full"/>
+	    <param name="library_type" value="FR Unstranded"/><param name="anchor_length" value="8"/><param name="splice_mismatches" value="0"/><param name="min_intron_length" value="70"/>
@@ -386,6 +455,9 @@
             <param name="max_segment_intron" value="500000" /><param name="seg_mismatches" value="2"/><param name="seg_length" value="25"/>
+            <param name="allow_indel_search" value="Yes"/>
+	    <param name="max_insertion_length" value="3"/>
+	    <param name="max_deletion_length" value="3"/><param name="use_junctions" value="Yes" /><param name="use_annotations" value="No" /><param name="use_juncs" value="No" />
@@ -398,6 +470,8 @@
             <param name="min_coverage_intron" value="50" /><param name="max_coverage_intron" value="20000" /><param name="microexon_search" value="Yes" />
+            <output name="insertions" file="tophat_out3i.bed" ftype="bed"/>
+            <output name="deletions" file="tophat_out3d.bed" ftype="bed"/><output name="junctions" file="tophat_out3j.bed" ftype="bed" /><output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" ftype="bam" /></test>
@@ -414,6 +488,7 @@
             <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/><param name="mate_inner_distance" value="20"/><param name="pSettingsType" value="full"/>
+	    <param name="library_type" value="FR Unstranded"/><param name="mate_std_dev" value="20"/><param name="anchor_length" value="8"/><param name="splice_mismatches" value="0"/>
@@ -426,6 +501,7 @@
             <param name="max_segment_intron" value="500000" /><param name="seg_mismatches" value="2"/><param name="seg_length" value="25"/>
+            <param name="allow_indel_search" value="No"/><param name="use_junctions" value="Yes" /><param name="use_annotations" value="No" /><param name="use_juncs" value="No" />

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

Bitbucket

tags

participants (1)