Hello there,
I was really pleased to see that you have the NGS toolbox within Galaxy. I was wondering if there are any plans to include TopHat (http://tophat.cbcb.umd.edu/)and/or Cufflinks (http://cufflinks.cbcb.umd.edu/) into the toolbox as this would be really beneficial.
Also do you have tools to convert fastq format to SAM?
Thanking you in advance.
Cheers, Rathi
On Thursday 22 October 2009 12:52:20 Rathi Thiagarajan wrote:
Hello there,
I was really pleased to see that you have the NGS toolbox within Galaxy. I was wondering if there are any plans to include TopHat (http://tophat.cbcb.umd.edu/)and/or Cufflinks (http://cufflinks.cbcb.umd.edu/) into the toolbox as this would be really beneficial.
Also do you have tools to convert fastq format to SAM?
Since SAM is Sequence Alignment/Map Format, the tool you should use is an aligner. I think there are now wrappers for some aligners (bowtie) included in galaxy. The latest releases of bowtie have the option -S to output SAM directly - but I don't know if galaxy enables this option. In the samtools there are many converters for different aligners to SAM.
best, ido
Hi Rathi!
I wrote a tophat wrapper myself- you can use it if u want, the files are attached. you need to adapt the BOWTIE_INDEXES path in the toohatwrapper.py and the bowtie path. if these variables are already set then you can just remove the lines between "#path settings" and "#path settings end". And you must include the tophat.xml in the tool-conf.xml in the main galaxy folder - in a section of your choice.
Let me know if u find any errors-
greetings
mat
Rathi Thiagarajan schrieb:
Hello there,
I was really pleased to see that you have the NGS toolbox within Galaxy. I was wondering if there are any plans to include TopHat (http://tophat.cbcb.umd.edu/)and/or Cufflinks (http://cufflinks.cbcb.umd.edu/) into the toolbox as this would be really beneficial.
Also do you have tools to convert fastq format to SAM?
Thanking you in advance.
Cheers, Rathi
#! /usr/bin/python
import optparse, os, sys, tempfile
def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit()
def moveToTarget(target,filePath,log_report): # if os.path.isfile(filePath): cmd = "mv %s %s" % (filePath,target) try: os.system(cmd) except: print "Error moving files to galaxy "
def copyToTarget(targetStream,filePath,log_report): try: source = open(filePath,"r") handle = open(targetStream,"w") for line in source: handle.write(line) #handle.write('\n') source.close() handle.close() except IOError: log_report += "Target file " + filePath + "does not exist- probably Tophat createt no output."
def __main__(): #Parse Command Line parser = optparse.OptionParser() parser.add_option('-i', '--min-intron-length', dest='min_intron', help='Min. intron length') parser.add_option('-I', '--max-intron-length', dest='max_intron', help='Max. intron length') parser.add_option('-G', '--annotation', dest='gff3_file', help='none/filename') parser.add_option('', '--no-gff-juncs', dest='no_gff_juncs', help='Dont report GFF junctions (yes/no)') parser.add_option('', '--no-novel-juncs', dest='no_novel_juncs', help='Only report new junctions (yes/no)') parser.add_option('', '--bowtie_index', dest='bowtie_index', help='Bowtie index name') parser.add_option('', '--source-left', dest='source_left', help='source file (left if paired end)') parser.add_option('', '--source-right', dest='source_right', help='source file (right paired end reads)') parser.add_option('', '--coverage', dest='coverage', help='coverage') parser.add_option('', '--junctions', dest='junctions', help='junctions') parser.add_option('', '--expr_file', dest='expr_file', help='expr_file') parser.add_option('', '--accepted_hits', dest='accepted_hits', help='hits') parser.add_option('', '--report', dest='report', help='report') parser.add_option('', '--solexa1.3-quals', dest='solexa13', help='use solexa 1.3 pipeline (yes/no)') parser.add_option('', '--log_report', dest='log_report', help='log report') parser.add_option('', '--mate-inner-dist', dest='mate_inner_dist', help='report') parser.add_option('', '--paired-end', dest='paired_end', help='use paired end reads (yes/no)') parser.add_option('', '--file-format', dest='file_format', help='(fasta/fastqsolexa/fastqsanger)')
(options, args) = parser.parse_args()
solexa13_cmd="" if options.solexa13 == "yes": solexa13_cmd="--solexa1.3-quals"
no_novel_cmd="" if options.no_novel_juncs == "yes": no_novel_cmd="--no-novel-juncs"
no_gff_cmd="" if options.no_gff_juncs == "yes": no_gff_cmd="--no-gff-juncs"
gff3_cmd = "" if options.gff3_file != 'none': gff3_cmd = "-G %s " % options.gff3_file
# make temp directory for result files tmp_dir = tempfile.gettempdir() #print tmp_dir
new_files_left=[] new_files_right=[] files_right=[] files_left=[]
# ========= path settings
os.putenv('BOWTIE_INDEXES','/data/galaxy/indexes/')
path = os.environ['PATH'] path += ':/usr/local/bowtie/' os.putenv('PATH',path)
# ========= path settings end
try: if options.paired_end == "yes": if len(options.source_right) != len(options.source_left): print "Error, must be an equivalent number of left/right read files" return
files_right = options.source_right.split(',') files_right.pop(-1)
files_left = options.source_left.split(',') files_left.pop(-1)
tmpFileName=""
for i in files_left: tmpFileName=i.rstrip('.dat') tmpFileName = tmpFileName + "_1." + options.file_format moveToTarget(i,tmpFileName,options.log_report) new_files_left.append(tmpFileName)
for i in files_right: tmpFileName=i.rstrip('.dat') tmpFileName = tmpFileName + "_2." + options.file_format moveToTarget(i,tmpFileName,options.log_report) new_files_right.append(tmpFileName)
print 'LEFT %s'%new_files_left
print 'RIGHT %s'%new_files_right
tophat_cmd = 'tophat -p 4 -i %s -I %s -r %s -o %s %s %s %s %s %s %s %s' % (options.min_intron, options.max_intron,options.mate_inner_dist,tmp_dir,solexa13_cmd,no_novel_cmd,no_gff_cmd,gff3_cmd,options.bowtie_index,','.join(new_files_left),','.join(new_files_right)) os.system(tophat_cmd) os.wait()
#print 'CMD %s'%tophat_cmd
#copy files back for i in new_files_left: for j in files_left: moveToTarget(i,j,log_report) for i in new_files_right: for j in files_right: moveToTarget(i,j,log_report)
else: files_left = options.source_left.split(',') files_left.pop(-1) if len(files_left) == 1: file_list = files_left[0] else: file_list = ','.join(files_left)
tophat_cmd = 'tophat -p 4 -i %s -I %s -r %s -o %s %s %s %s %s %s %s' % (options.min_intron, options.max_intron,options.mate_inner_dist,tmp_dir,solexa13_cmd,no_novel_cmd,no_gff_cmd,gff3_cmd,options.bowtie_index,file_list)
os.system(tophat_cmd) os.wait()
#print tophat_cmd
except Exception, erf: options.log_report += 'Error creating temp directory for indexing purposes\n'
filePath = tmp_dir + "/coverage.wig" moveToTarget(options.coverage,filePath,options.log_report)
filePath = tmp_dir + "/junctions.bed" moveToTarget(options.junctions,filePath,options.log_report)
filePath = tmp_dir + "/tophat_output.gff3.expr" moveToTarget(options.expr_file,filePath,options.log_report)
filePath = tmp_dir + "/accepted_hits.sam" moveToTarget(options.accepted_hits,filePath,options.log_report)
if options.paired_end == "yes":
for i in new_files_right: moveToTarget(i,files_right,options.log_report);
for i in new_files_left: moveToTarget(i,files_left,options.log_report); #if os.path.exists(tmp_dir): #os.rmdir(tmp_dir)
if __name__=="__main__": __main__()
galaxy-user@lists.galaxyproject.org