1 new changeset in galaxy-central:
http://bitbucket.org/galaxy/galaxy-central/changeset/c0d6d17fc7db/
changeset: r5122:c0d6d17fc7db
user: jgoecks
date: 2011-02-24 21:24:52
summary: Add options to Tophat wrapper to support v1.2.0 functionality: (a) allow
indel search; (b) max insertion and max deletion lengths; and (c) library type. Update
functional tests to tests new options. Also update function that checks from_work_dir
output attribute to use real path to prevent mishandling of symbolic links.
affected #: 5 files (6.0 KB)
--- a/lib/galaxy/jobs/__init__.py Thu Feb 24 15:23:21 2011 -0500
+++ b/lib/galaxy/jobs/__init__.py Thu Feb 24 15:24:52 2011 -0500
@@ -505,10 +505,14 @@
return
job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) )
job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None )
- def file_in_dir( file_path, a_dir ):
- """ Returns true if file is in directory. """
- abs_file_path = os.path.abspath( file_path )
- return os.path.split( abs_file_path )[0] == os.path.abspath( a_dir )
+ def in_directory( file, directory ):
+ # Make both absolute.
+ directory = os.path.realpath( directory )
+ file = os.path.realpath( file )
+
+ #Return true, if the common prefix of both is equal to directory
+ #e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
+ return os.path.commonprefix( [ file, directory ] ) == directory
for dataset_assoc in job.output_datasets + job.output_library_datasets:
context = self.get_dataset_finish_context( job_context,
dataset_assoc.dataset.dataset )
#should this also be checking library associations? - can a library item be
added from a history before the job has ended? - lets not allow this to occur
@@ -523,7 +527,7 @@
if hda_tool_output and hda_tool_output.from_work_dir:
# Copy from working dir to HDA.
source_file = os.path.join( os.path.abspath(
self.working_directory ), hda_tool_output.from_work_dir )
- if file_in_dir( source_file, self.working_directory ):
+ if in_directory( source_file, self.working_directory ):
try:
shutil.move( source_file, dataset.file_name )
log.debug( "finish(): Moved %s to %s as directed
by from_work_dir" % ( source_file, dataset.file_name ) )
--- a/tools/ngs_rna/tophat_wrapper.py Thu Feb 24 15:23:21 2011 -0500
+++ b/tools/ngs_rna/tophat_wrapper.py Thu Feb 24 15:24:52 2011 -0500
@@ -30,6 +30,10 @@
parser.add_option( '-g', '--max_multihits',
dest='max_multihits', help='Maximum number of alignments to be allowed' )
parser.add_option( '', '--seg-mismatches',
dest='seg_mismatches', help='Number of mismatches allowed in each segment
alignment for reads mapped independently' )
parser.add_option( '', '--seg-length', dest='seg_length',
help='Minimum length of read segments' )
+ parser.add_option( '', '--library-type', dest='library_type',
help='TopHat will treat the reads as strand specific. Every read alignment will have
an XS attribute tag. Consider supplying library type options below to select the correct
RNA-seq protocol.' )
+ parser.add_option( '', '--allow-indels',
action="store_true", help='Allow indel search. Indel search is disabled by
default.' )
+ parser.add_option( '', '--max-insertion-length',
dest='max_insertion_length', help='The maximum insertion length. The default
is 3.' )
+ parser.add_option( '', '--max-deletion-length',
dest='max_deletion_length', help='The maximum deletion length. The default is
3.' )
# Options for supplying own junctions
parser.add_option( '-G', '--GTF',
dest='gene_model_annotations', help='Supply TopHat with a list of gene model
annotations. \
@@ -115,8 +119,7 @@
index_path = options.index_path
# Build tophat command.
- tmp_output_dir = tempfile.mkdtemp()
- cmd = 'tophat -o %s %s %s %s'
+ cmd = 'tophat %s %s %s'
reads = options.input1
if options.input2:
reads += ' ' + options.input2
@@ -124,7 +127,7 @@
if options.single_paired == 'paired':
opts += ' -r %s' % options.mate_inner_dist
if options.settings == 'preSet':
- cmd = cmd % ( tmp_output_dir, opts, index_path, reads )
+ cmd = cmd % ( opts, index_path, reads )
else:
try:
if int( options.min_anchor_length ) >= 3:
@@ -144,6 +147,13 @@
opts += ' -j %s' % options.raw_juncs
if options.no_novel_juncs:
opts += ' --no-novel-juncs'
+ if options.library_type:
+ opts += ' --library-type %s' % options.library_type
+ if options.allow_indels:
+ # Max options do not work for Tophat v1.2.0, despite documentation to the
contrary.
+ opts += ' --allow-indels'
+ #opts += ' --allow-indels --max-insertion-length %i
--max-deletion-length %i' % ( int( options.max_insertion_length ), int(
options.max_deletion_length ) )
+
# Search type options.
if options.coverage_search:
@@ -166,23 +176,21 @@
opts += ' --min-segment-intron %d' %
int(options.min_segment_intron)
if options.max_segment_intron:
opts += ' --max-segment-intron %d' %
int(options.max_segment_intron)
- cmd = cmd % ( tmp_output_dir, opts, index_path, reads )
+ cmd = cmd % ( opts, index_path, reads )
except Exception, e:
# Clean up temp dirs
if os.path.exists( tmp_index_dir ):
shutil.rmtree( tmp_index_dir )
- if os.path.exists( tmp_output_dir ):
- shutil.rmtree( tmp_output_dir )
stop_err( 'Something is wrong with the alignment parameters and the
alignment could not be run\n' + str( e ) )
print cmd
# Run
try:
- tmp_out = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
+ tmp_out = tempfile.NamedTemporaryFile().name
tmp_stdout = open( tmp_out, 'wb' )
- tmp_err = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
+ tmp_err = tempfile.NamedTemporaryFile().name
tmp_stderr = open( tmp_err, 'wb' )
- proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_output_dir,
stdout=tmp_stdout, stderr=tmp_stderr )
+ proc = subprocess.Popen( args=cmd, shell=True, cwd=".",
stdout=tmp_stdout, stderr=tmp_stderr )
returncode = proc.wait()
tmp_stderr.close()
# get stderr, allowing for case where it's very large
@@ -202,17 +210,11 @@
raise Exception, stderr
# TODO: look for errors in program output.
-
- # Copy output files from tmp directory to specified files.
- shutil.copyfile( os.path.join( tmp_output_dir, "junctions.bed" ),
options.junctions_output_file )
- shutil.copyfile( os.path.join( tmp_output_dir, "accepted_hits.bam" ),
options.accepted_hits_output_file )
except Exception, e:
stop_err( 'Error in tophat:\n' + str( e ) )
# Clean up temp dirs
if os.path.exists( tmp_index_dir ):
shutil.rmtree( tmp_index_dir )
- if os.path.exists( tmp_output_dir ):
- shutil.rmtree( tmp_output_dir )
if __name__=="__main__": __main__()
--- a/tools/ngs_rna/tophat_wrapper.xml Thu Feb 24 15:23:21 2011 -0500
+++ b/tools/ngs_rna/tophat_wrapper.xml Thu Feb 24 15:24:52 2011 -0500
@@ -39,6 +39,14 @@
--max-segment-intron $singlePaired.sParams.max_segment_intron
--seg-mismatches=$singlePaired.sParams.seg_mismatches
--seg-length=$singlePaired.sParams.seg_length
+ --library-type=$singlePaired.sParams.library_type
+
+ ## Indel search.
+ #if $singlePaired.sParams.indel_search.allow_indel_search ==
"Yes":
+ --allow-indels
+ --max-insertion-length
$singlePaired.sParams.indel_search.max_insertion_length
+ --max-deletion-length
$singlePaired.sParams.indel_search.max_deletion_length
+ #end if
## Supplying junctions parameters.
#if $singlePaired.sParams.own_junctions.use_junctions ==
"Yes":
@@ -90,6 +98,14 @@
--max-segment-intron $singlePaired.pParams.max_segment_intron
--seg-mismatches=$singlePaired.pParams.seg_mismatches
--seg-length=$singlePaired.pParams.seg_length
+ --library-type=$singlePaired.pParams.library_type
+
+ ## Indel search.
+ #if $singlePaired.pParams.indel_search.allow_indel_search ==
"Yes":
+ --allow-indels
+ --max-insertion-length
$singlePaired.pParams.indel_search.max_insertion_length
+ --max-deletion-length
$singlePaired.pParams.indel_search.max_deletion_length
+ #end if
## Supplying junctions parameters.
#if $singlePaired.pParams.own_junctions.use_junctions ==
"Yes":
@@ -157,10 +173,26 @@
<when value="preSet" /><!-- Full/advanced parms.
--><when value="full">
+ <param name="library_type" type="select"
label="Library Type" help="TopHat will treat the reads as strand specific.
Every read alignment will have an XS attribute tag. Consider supplying library type
options below to select the correct RNA-seq protocol.">
+ <option value="fr-unstranded">FR
Unstranded</option>
+ <option value="fr-firststrand">FR First
Strand</option>
+ <option value="fr-secondstrand">FR Second
Strand</option>
+ </param><param name="anchor_length"
type="integer" value="8" label="Anchor length (at least 3)"
help="Report junctions spanned by reads with at least this many bases on each side of
the junction." /><param name="splice_mismatches"
type="integer" value="0" label="Maximum number of mismatches that
can appear in the anchor region of spliced alignment" /><param
name="min_intron_length" type="integer" value="70"
label="The minimum intron length" help="TopHat will ignore donor/acceptor
pairs closer than this many bases apart." /><param
name="max_intron_length" type="integer" value="500000"
label="The maximum intron length" help="When searching for junctions ab
initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except
when such a pair is supported by a split segment alignment of a long read." />
+ <conditional name="indel_search">
+ <param name="allow_indel_search"
type="select" label="Allow indel search">
+ <option value="No">No</option>
+ <option value="Yes">Yes</option>
+ </param>
+ <when value="No"/>
+ <when value="Yes">
+ <param name="max_insertion_length"
type="integer" value="3" label="Max insertion length."
help="The maximum insertion length." />
+ <param name="max_deletion_length"
type="integer" value="3" label="Max deletion length."
help="The maximum deletion length." />
+ </when>
+ </conditional><param name="junction_filter"
type="float" value="0.15" label="Minimum isoform fraction: filter
out junctions supported by too few alignments (number of reads divided by average depth of
coverage)" help="0.0 to 1.0 (0 to turn off)" /><param
name="max_multihits" type="integer" value="40"
label="Maximum number of alignments to be allowed" /><param
name="min_segment_intron" type="integer" value="50"
label="Minimum intron length that may be found during split-segment (default)
search" />
@@ -247,11 +279,27 @@
<when value="preSet" /><!-- Full/advanced parms.
--><when value="full">
+ <param name="library_type" type="select"
label="Library Type" help="TopHat will treat the reads as strand specific.
Every read alignment will have an XS attribute tag. Consider supplying library type
options below to select the correct RNA-seq protocol.">
+ <option value="fr-unstranded">FR
Unstranded</option>
+ <option value="fr-firststrand">FR First
Strand</option>
+ <option value="fr-secondstrand">FR Second
Strand</option>
+ </param><param name="mate_std_dev"
type="integer" value="20" label="Std. Dev for Distance between
Mate Pairs" help="The standard deviation for the distribution on inner
distances between mate pairs."/><param name="anchor_length"
type="integer" value="8" label="Anchor length (at least 3)"
help="Report junctions spanned by reads with at least this many bases on each side of
the junction." /><param name="splice_mismatches"
type="integer" value="0" label="Maximum number of mismatches that
can appear in the anchor region of spliced alignment" /><param
name="min_intron_length" type="integer" value="70"
label="The minimum intron length" help="TopHat will ignore donor/acceptor
pairs closer than this many bases apart." /><param
name="max_intron_length" type="integer" value="500000"
label="The maximum intron length" help="When searching for junctions ab
initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except
when such a pair is supported by a split segment alignment of a long read." />
+ <conditional name="indel_search">
+ <param name="allow_indel_search"
type="select" label="Allow indel search">
+ <option value="No">No</option>
+ <option value="Yes">Yes</option>
+ </param>
+ <when value="No"/>
+ <when value="Yes">
+ <param name="max_insertion_length"
type="integer" value="3" label="Max insertion length."
help="The maximum insertion length." />
+ <param name="max_deletion_length"
type="integer" value="3" label="Max deletion length."
help="The maximum deletion length." />
+ </when>
+ </conditional><param name="junction_filter"
type="float" value="0.15" label="Minimum isoform fraction: filter
out junctions supported by too few alignments (number of reads divided by average depth of
coverage)" help="0.0 to 1.0 (0 to turn off)" /><param
name="max_multihits" type="integer" value="40"
label="Maximum number of alignments to be allowed" /><param
name="min_segment_intron" type="integer" value="50"
label="Minimum intron length that may be found during split-segment (default)
search" />
@@ -329,8 +377,28 @@
</inputs><outputs>
- <data format="bed" name="junctions"
label="${tool.name} on ${on_string}: splice junctions"/>
- <data format="bam" name="accepted_hits"
label="${tool.name} on ${on_string}: accepted_hits"/>
+ <data format="bed" name="insertions"
label="${tool.name} on ${on_string}: insertions"
from_work_dir="tophat_out/insertions.bed">
+ <filter>
+ (
+ ( ( 'sParams' in singlePaired ) and ( 'indel_search'
in singlePaired['sParams'] ) and
+ (
singlePaired['sParams']['indel_search']['allow_indel_search'] ==
'Yes' ) ) or
+ ( ( 'pParams' in singlePaired ) and ( 'indel_search'
in singlePaired['pParams'] ) and
+ (
singlePaired['pParams']['indel_search']['allow_indel_search'] ==
'Yes' ) )
+ )
+ </filter>
+ </data>
+ <data format="bed" name="deletions"
label="${tool.name} on ${on_string}: deletions"
from_work_dir="tophat_out/deletions.bed">
+ <filter>
+ (
+ ( ( 'sParams' in singlePaired ) and ( 'indel_search'
in singlePaired['sParams'] ) and
+ (
singlePaired['sParams']['indel_search']['allow_indel_search'] ==
'Yes' ) ) or
+ ( ( 'pParams' in singlePaired ) and ( 'indel_search'
in singlePaired['pParams'] ) and
+ (
singlePaired['pParams']['indel_search']['allow_indel_search'] ==
'Yes' ) )
+ )
+ </filter>
+ </data>
+ <data format="bed" name="junctions"
label="${tool.name} on ${on_string}: splice junctions"
from_work_dir="tophat_out/junctions.bed"/>
+ <data format="bam" name="accepted_hits"
label="${tool.name} on ${on_string}: accepted_hits"
from_work_dir="tophat_out/accepted_hits.bam"/></outputs><tests>
@@ -367,7 +435,7 @@
<test><!-- Tophat commands:
bowtie-build -f test-data/tophat_in1.fasta tophat_in1
- tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40
+coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2
+segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50
+max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
+ tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 ++allow-indels
+coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2
+segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50
+max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
Replace the + with double-dash
--><param name="genomeSource" value="history"/>
@@ -375,6 +443,7 @@
<param name="sPaired" value="single"/><param
name="input1" ftype="fastqsanger"
value="tophat_in2.fastqsanger"/><param name="sSettingsType"
value="full"/>
+ <param name="library_type" value="FR
Unstranded"/><param name="anchor_length"
value="8"/><param name="splice_mismatches"
value="0"/><param name="min_intron_length"
value="70"/>
@@ -386,6 +455,9 @@
<param name="max_segment_intron" value="500000"
/><param name="seg_mismatches" value="2"/><param
name="seg_length" value="25"/>
+ <param name="allow_indel_search" value="Yes"/>
+ <param name="max_insertion_length" value="3"/>
+ <param name="max_deletion_length" value="3"/><param
name="use_junctions" value="Yes" /><param
name="use_annotations" value="No" /><param
name="use_juncs" value="No" />
@@ -398,6 +470,8 @@
<param name="min_coverage_intron" value="50"
/><param name="max_coverage_intron" value="20000" /><param
name="microexon_search" value="Yes" />
+ <output name="insertions" file="tophat_out3i.bed"
ftype="bed"/>
+ <output name="deletions" file="tophat_out3d.bed"
ftype="bed"/><output name="junctions"
file="tophat_out3j.bed" ftype="bed" /><output
name="accepted_hits" file="tophat_out3h.bam"
compare="sim_size" ftype="bam" /></test>
@@ -414,6 +488,7 @@
<param name="input2" ftype="fastqsanger"
value="tophat_in3.fastqsanger"/><param
name="mate_inner_distance" value="20"/><param
name="pSettingsType" value="full"/>
+ <param name="library_type" value="FR
Unstranded"/><param name="mate_std_dev"
value="20"/><param name="anchor_length"
value="8"/><param name="splice_mismatches"
value="0"/>
@@ -426,6 +501,7 @@
<param name="max_segment_intron" value="500000"
/><param name="seg_mismatches" value="2"/><param
name="seg_length" value="25"/>
+ <param name="allow_indel_search"
value="No"/><param name="use_junctions" value="Yes"
/><param name="use_annotations" value="No" /><param
name="use_juncs" value="No" />
Repository URL:
https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from
bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.