
2 new changesets in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/3479cddf6bfa/ changeset: 3479cddf6bfa user: fubar date: 2011-08-04 02:57:03 summary: fixes for rgManQQ plots to create a compressed PDF using GS as the downloadable artifact - much more useful for publication affected #: 5 files (3.6 KB) --- a/test-data/rgtestouts/rgManQQ/rgManQQtest1.html Wed Aug 03 13:54:28 2011 -0400 +++ b/test-data/rgtestouts/rgManQQ/rgManQQtest1.html Thu Aug 04 10:57:03 2011 +1000 @@ -13,8 +13,10 @@ <h1>rgManQQtest1</h1><table> -<tr><td><a href="Allelep_manhattan.png"><img src="Allelep_manhattan.png" title="Allelep_manhattan.png hspace="10" width="400"><br>(Click to download image Allelep_manhattan.png)</a></td></tr> -<tr><td><a href="Allelep_qqplot.png"><img src="Allelep_qqplot.png" title="Allelep_qqplot.png hspace="10" width="400"><br>(Click to download image Allelep_qqplot.png)</a></td></tr> +<tr><td><a href="Allelep_manhattan.pdf">Allelep_manhattan.pdf</a></td></tr> +<tr><td><a href="Allelep_manhattan.pdf"><img src="Allelep_manhattan.png" title="Allelep_manhattan.png" hspace="10" width="800"></a></td></tr> +<tr><td><a href="Allelep_qqplot.pdf">Allelep_qqplot.pdf</a></td></tr> +<tr><td><a href="Allelep_qqplot.pdf"><img src="Allelep_qqplot.png" title="Allelep_qqplot.png" hspace="10" width="800"></a></td></tr><tr><td><a href="rgManQQtest1.R">rgManQQtest1.R</a></td></tr><tr><td><a href="rgManQQtest1.R.log">rgManQQtest1.R.log</a></td></tr></table> @@ -43,7 +45,7 @@ Loading required package: proto -[1] "### 101 values read from /data/tmp/tmpM8NZ50/database/files/000/dataset_1.dat read - now running plots" +[1] "### 101 values read from /data/tmp/tmpB3Kfsc/database/files/000/dataset_1.dat read - now running plots" [1] "## qqplot on Allelep done" @@ -53,7 +55,6 @@ ## R script= -# license not stated so I'm assuming LGPL is ok for my derived work? # generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics # Originally created as qqman with the following # attribution: @@ -79,77 +80,96 @@ size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!") genomewideline=NULL # was genomewideline=-log10(5e-8) + n = length(pvals) if (genomewide) { # use bonferroni since might be only a small region? - genomewideline = -log10(0.05/length(pvals)) } + genomewideline = -log10(0.05/n) } offset = as.integer(offset) + if (n > 1000000) { offset = offset/10000 } + else if (n > 10000) { offset = offset/1000} + chro = as.integer(chrom) # already dealt with X and friends? pvals = as.double(pvals) - chro = as.integer(chrom) # already dealt with X and friends? d=data.frame(CHR=chro,BP=offset,P=pvals) - #limit to only chrs 1-22, x=23,y=24,Mt=25? - d=d[d$CHR %in% 1:25, ] if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) { - #d=na.omit(d) + d=d[!is.na(d$P), ] + d=d[!is.na(d$BP), ] + d=d[!is.na(d$CHR), ] + #limit to only chrs 1-22, x=23,y=24,Mt=25? + d=d[d$CHR %in% 1:25, ] d=d[d$P>0 & d$P<=1, ] d$logp = as.double(-log10(d$P)) + dlen = length(d$P) d$pos=NA ticks=NULL lastbase=0 chrlist = unique(d$CHR) + chrlist = as.integer(chrlist) chrlist = sort(chrlist) # returns lexical ordering + if (max.y=="max") { maxy = ceiling(max(d$logp)) } + else { maxy = max.y } nchr = length(chrlist) # may be any number? + maxy = max(maxy,1.1*genomewideline) if (nchr >= 2) { - for (x in c(1:nchr)) { + for (x in c(1:nchr)) { i = chrlist[x] # need the chrom number - may not == index if (x == 1) { # first time - d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP - tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1] - } else { - lastchr = chrlist[x-1] # previous whatever the list - lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1) - d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase - if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) { - cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos)) - } - tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) - + d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP # initialize to first BP of chr1 + dsub = subset(d,CHR==i) + dlen = length(dsub$P) + lastbase = max(dsub$pos) # last one + tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1] + lastchr = i + } else { + d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP+lastbase # one humongous contig + if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) { + cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos)) + } + tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) + lastchr = i + dsub = subset(d,CHR==i) + lastbase = max(dsub$pos) # last one } ticklim=c(min(d$pos),max(d$pos)) xlabs = chrlist } } else { # nchr is 1 nticks = 10 - last = max(offset) - first = min(offset) - tks = c() + last = max(d$BP) + first = min(d$BP) + tks = c(first) t = (last-first)/nticks # units per tick - for (x in c(1:nticks)) tks = c(tks,round(x*t)) - xlabs = tks + for (x in c(1:(nticks))) { + tks = c(tks,round(x*t)+first) } ticklim = c(first,last) } # else if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR)) } else { mycols=rep(coloursTouse,max(d$CHR)) } - - if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y - maxy = max(maxy,1.1*genomewideline) + dlen = length(d$P) + d$pranks = rank(d$P)/dlen + d$centiles = 100*d$pranks # small are interesting + d$sizes = ifelse((d$centile < 1),2,1) if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ] if (nchr >= 2) { - manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) - manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs) } + manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR),size=factor(sizes)) + manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs,limits=ticklim) + manplot=manplot+scale_size_manual(values = c(0.5,1.5)) # requires discreet scale - eg factor + #manplot=manplot+scale_size(values=c(0.5,2)) # requires continuous + } else { manplot=qplot(BP,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) - manplot=manplot+scale_x_continuous("BP") } + manplot=manplot+scale_x_continuous(name=paste("Chromosome",chrlist[1]), breaks=tks, labels=tks,limits=ticklim) + } manplot=manplot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy) manplot=manplot+scale_colour_manual(value=mycols) if (annotate) { manplot=manplot + geom_point(data=d.annotate, colour=I("green3")) } manplot=manplot + opts(legend.position = "none") manplot=manplot + opts(title=title) manplot=manplot+opts( - panel.background=theme_blank(), - axis.text.x=theme_text(size=size.x.labels, colour="grey50"), - axis.text.y=theme_text(size=size.y.labels, colour="grey50"), - axis.ticks=theme_segment(colour=NA) + panel.background=theme_blank(), + axis.text.x=theme_text(size=size.x.labels, colour="grey50"), + axis.text.y=theme_text(size=size.y.labels, colour="grey50"), + axis.ticks=theme_segment(colour=NA) ) if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red") @@ -178,7 +198,7 @@ qq } -rgqqMan = function(infile="/data/tmp/tmpM8NZ50/database/files/000/dataset_1.dat",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), +rgqqMan = function(infile="/data/tmp/tmpB3Kfsc/database/files/000/dataset_1.dat",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), title="rgManQQtest1",grey=0) { rawd = read.table(infile,head=T,sep='\t') dn = names(rawd) @@ -203,13 +223,15 @@ mytitle = paste('p=',cname,', ',title,sep='') myfname = chartr(' ','_',cname) myqqplot = qq(rawd[,pvalscolumn],title=mytitle) - ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=6,height=4,dpi=100) + ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=6,dpi=96) + ggsave(filename=paste(myfname,"qqplot.pdf",sep='_'),myqqplot,width=8,height=6,dpi=96) print(paste('## qqplot on',cname,'done')) if ((chromcolumn > 0) & (offsetcolumn > 0)) { print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) + ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=8,height=6,dpi=96) + ggsave(filename=paste(myfname,"manhattan.pdf",sep='_'),mymanplot,width=8,height=6,dpi=96) print(paste('## manhattan plot on',cname,'done')) - ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=6,height=4,dpi=100) } else { print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn, @@ -228,6 +250,6 @@ </pre> -<b><a href="http://rgenetics.org">Galaxy Rgenetics</a> tool output rgManQQ.py run at 20/07/2011 13:29:43</b><br/> +<h3><a href="http://rgenetics.org">Rgenetics</a> tool rgManQQ.py run at 04/08/2011 10:51:34</h3></div></body></html> Binary file test-data/sam_merge_out2.bam has changed --- a/tool_conf.xml.sample Wed Aug 03 13:54:28 2011 -0400 +++ b/tool_conf.xml.sample Thu Aug 04 10:57:03 2011 +1000 @@ -343,7 +343,7 @@ <tool file="samtools/sam2interval.xml" /><tool file="samtools/sam_to_bam.xml" /><tool file="samtools/bam_to_sam.xml" /> - <tool file="samtools/sam_merge.xml" /> + <tool file="picard/sam_merge.xml" /><tool file="samtools/sam_pileup.xml" /><tool file="samtools/pileup_parser.xml" /><tool file="samtools/pileup_interval.xml" /> --- a/tools/rgenetics/rgManQQ.py Wed Aug 03 13:54:28 2011 -0400 +++ b/tools/rgenetics/rgManQQ.py Thu Aug 04 10:57:03 2011 +1000 @@ -1,4 +1,10 @@ #!/usr/local/bin/python +# This is a truly ghastly hack +# all of the heavy data cleaning lifting is done in R which is a really dumb place IMHO +# Making a new file seems a waste but it would be far easier to set everything up in python +# seems to work so I'm leaving it alone +# sigh. Should really move this gig to rpy - writing a robust R script is hard. +# updated to compress pdf using gs since millions of points = horsechoker pdfs and pdfs are good # updated july 20 to fix sort order - R unique() sorts into strict collating order # so need to sort after unique to revert to lexicographic order for x axis on Manhattan # rgmanqq updated july 19 to deal with x,y and mt @@ -13,7 +19,6 @@ debug = False rcode=""" -# license not stated so I'm assuming LGPL is ok for my derived work? # generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics # Originally created as qqman with the following # attribution: @@ -39,77 +44,96 @@ size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!") genomewideline=NULL # was genomewideline=-log10(5e-8) + n = length(pvals) if (genomewide) { # use bonferroni since might be only a small region? - genomewideline = -log10(0.05/length(pvals)) } + genomewideline = -log10(0.05/n) } offset = as.integer(offset) + if (n > 1000000) { offset = offset/10000 } + else if (n > 10000) { offset = offset/1000} + chro = as.integer(chrom) # already dealt with X and friends? pvals = as.double(pvals) - chro = as.integer(chrom) # already dealt with X and friends? d=data.frame(CHR=chro,BP=offset,P=pvals) - #limit to only chrs 1-22, x=23,y=24,Mt=25? - d=d[d$CHR %in% 1:25, ] if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) { - #d=na.omit(d) + d=d[!is.na(d$P), ] + d=d[!is.na(d$BP), ] + d=d[!is.na(d$CHR), ] + #limit to only chrs 1-22, x=23,y=24,Mt=25? + d=d[d$CHR %in% 1:25, ] d=d[d$P>0 & d$P<=1, ] d$logp = as.double(-log10(d$P)) + dlen = length(d$P) d$pos=NA ticks=NULL lastbase=0 chrlist = unique(d$CHR) + chrlist = as.integer(chrlist) chrlist = sort(chrlist) # returns lexical ordering + if (max.y=="max") { maxy = ceiling(max(d$logp)) } + else { maxy = max.y } nchr = length(chrlist) # may be any number? + maxy = max(maxy,1.1*genomewideline) if (nchr >= 2) { - for (x in c(1:nchr)) { + for (x in c(1:nchr)) { i = chrlist[x] # need the chrom number - may not == index if (x == 1) { # first time - d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP - tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1] - } else { - lastchr = chrlist[x-1] # previous whatever the list - lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1) - d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase - if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) { - cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos)) - } - tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) - + d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP # initialize to first BP of chr1 + dsub = subset(d,CHR==i) + dlen = length(dsub$P) + lastbase = max(dsub$pos) # last one + tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1] + lastchr = i + } else { + d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP+lastbase # one humongous contig + if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) { + cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos)) + } + tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) + lastchr = i + dsub = subset(d,CHR==i) + lastbase = max(dsub$pos) # last one } ticklim=c(min(d$pos),max(d$pos)) xlabs = chrlist } } else { # nchr is 1 nticks = 10 - last = max(offset) - first = min(offset) - tks = c() + last = max(d$BP) + first = min(d$BP) + tks = c(first) t = (last-first)/nticks # units per tick - for (x in c(1:nticks)) tks = c(tks,round(x*t)) - xlabs = tks + for (x in c(1:(nticks))) { + tks = c(tks,round(x*t)+first) } ticklim = c(first,last) } # else if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR)) } else { mycols=rep(coloursTouse,max(d$CHR)) } - - if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y - maxy = max(maxy,1.1*genomewideline) + dlen = length(d$P) + d$pranks = rank(d$P)/dlen + d$centiles = 100*d$pranks # small are interesting + d$sizes = ifelse((d$centile < 1),2,1) if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ] if (nchr >= 2) { - manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) - manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs) } + manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR),size=factor(sizes)) + manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs,limits=ticklim) + manplot=manplot+scale_size_manual(values = c(0.5,1.5)) # requires discreet scale - eg factor + #manplot=manplot+scale_size(values=c(0.5,2)) # requires continuous + } else { manplot=qplot(BP,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) - manplot=manplot+scale_x_continuous("BP") } + manplot=manplot+scale_x_continuous(name=paste("Chromosome",chrlist[1]), breaks=tks, labels=tks,limits=ticklim) + } manplot=manplot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy) manplot=manplot+scale_colour_manual(value=mycols) if (annotate) { manplot=manplot + geom_point(data=d.annotate, colour=I("green3")) } manplot=manplot + opts(legend.position = "none") manplot=manplot + opts(title=title) manplot=manplot+opts( - panel.background=theme_blank(), - axis.text.x=theme_text(size=size.x.labels, colour="grey50"), - axis.text.y=theme_text(size=size.y.labels, colour="grey50"), - axis.ticks=theme_segment(colour=NA) + panel.background=theme_blank(), + axis.text.x=theme_text(size=size.x.labels, colour="grey50"), + axis.text.y=theme_text(size=size.y.labels, colour="grey50"), + axis.ticks=theme_segment(colour=NA) ) if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red") @@ -168,13 +192,15 @@ mytitle = paste('p=',cname,', ',title,sep='') myfname = chartr(' ','_',cname) myqqplot = qq(rawd[,pvalscolumn],title=mytitle) - ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=6,height=4,dpi=100) + ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=6,dpi=96) + ggsave(filename=paste(myfname,"qqplot.pdf",sep='_'),myqqplot,width=8,height=6,dpi=96) print(paste('## qqplot on',cname,'done')) if ((chromcolumn > 0) & (offsetcolumn > 0)) { print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) + ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=8,height=6,dpi=96) + ggsave(filename=paste(myfname,"manhattan.pdf",sep='_'),mymanplot,width=8,height=6,dpi=96) print(paste('## manhattan plot on',cname,'done')) - ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=6,height=4,dpi=100) } else { print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn, @@ -212,6 +238,17 @@ rlog.append(rcmd) return rlog,flist +def compressPDF(inpdf=None): + """need absolute path to pdf + """ + assert os.path.isfile(inpdf), "## Input %s supplied to compressPDF not found" % inpdf + outpdf = '%s_compressed' % inpdf + cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf] + retval = subprocess.call(cl) + if retval == 0: + os.unlink(inpdf) + shutil.move(outpdf,inpdf) + return retval def main(): u = """<command interpreter="python"> @@ -261,11 +298,19 @@ html.append('<table>\n') for row in flist: fname,expl = row # RRun returns pairs of filenames fiddled for the log and R script - e = os.path.splitext(fname)[-1] + n,e = os.path.splitext(fname) if e in ['.png','.jpg']: - s= '<tr><td><a href="%s"><img src="%s" title="%s hspace="10" width="400"><br>(Click to download image %s)</a></td></tr>' \ - % (fname,fname,expl,expl ) - html.append(s) + pdf = '%s.pdf' % n + pdff = os.path.join(outdir,pdf) + if os.path.exists(pdff): + rval = compressPDF(inpdf=pdff) + if rval <> 0: + pdf = '%s(not_compressed)' % pdf + else: + pdf = '%s(not_found)' % pdf + s= '<tr><td><a href="%s"><img src="%s" title="%s" hspace="10" width="800"></a></td></tr>' \ + % (pdf,fname,expl) + html.append(s) else: html.append('<tr><td><a href="%s">%s</a></td></tr>' % (fname,expl)) html.append('</table>\n') --- a/tools/rgenetics/rgManQQ.xml Wed Aug 03 13:54:28 2011 -0400 +++ b/tools/rgenetics/rgManQQ.xml Thu Aug 04 10:57:03 2011 +1000 @@ -1,4 +1,4 @@ -<tool id="rgManQQ1" name="Manhattan/QQ:" version="1.0.2"> +<tool id="rgManQQ1" name="Manhattan/QQ:" version="1.0.3"><code file="rgManQQ_code.py"/><description>Plots for WGA P values</description> http://bitbucket.org/galaxy/galaxy-central/changeset/e79d064e4880/ changeset: e79d064e4880 user: fubar date: 2011-08-04 04:43:30 summary: Fixed sam_merge.xml to call Picard MergeSamFiles.jar so metadata can be propagated through to the new merged bam from all the individual files - the samtools version of merge would require this to be done separately and passed in with the -h option whereas Picard does it automatically. Added one more test. Interesting that the test for that tool has been failing to correctly pass metadata but passing the buildbot anyway. Thanks to Camille Stephan for pointing out the bug. Changes to rgManQQ so the user can obtain a decent pdf image. When millions of points are plotted, these are humongous so GS is called to compress the resulting pdf and they are now of reasonable size. PDF's are now linked from the thumbnails. Some minor fiddling with point size on the Manhatten plots so the intersting ones are a little more obvious. Minor tweak to twilltestcase.py so composite file components are copied correctly to the directory specified by GALAXY_TEST_SAVE. This makes updating test artefacts much simpler because running tests with GALAXY_TEST_SAVE pointing somewhere will now save every tested output file. affected #: 10 files (36.1 KB) Binary file test-data/rgtestouts/rgManQQ/Allelep_manhattan.png has changed Binary file test-data/rgtestouts/rgManQQ/Allelep_qqplot.png has changed --- a/test-data/rgtestouts/rgManQQ/rgManQQtest1.R Thu Aug 04 10:57:03 2011 +1000 +++ b/test-data/rgtestouts/rgManQQ/rgManQQtest1.R Thu Aug 04 12:43:30 2011 +1000 @@ -1,5 +1,4 @@ -# license not stated so I'm assuming LGPL is ok for my derived work? # generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics # Originally created as qqman with the following # attribution: @@ -8,7 +7,7 @@ # http://StephenTurner.us/ # http://GettingGeneticsDone.blogspot.com/ -# Last updated: Tuesday, December 22, 2009 +# Last updated: 19 July 2011 by Ross Lazarus # R code for making manhattan plots and QQ plots from plink output files. # With GWAS data this can take a lot of memory. Recommended for use on # 64bit machines only, for now. @@ -18,86 +17,104 @@ library(ggplot2) coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen') -# not too fugly but need a colour expert please... +# not too ugly but need a colour expert please... -manhattan = function(chrom=NULL,offset=NULL,pvals=NULL, title=NULL, max.y="max", - suggestiveline=0, genomewide=T, size.x.labels=9, size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { - +DrawManhattan = function(pvals=Null,chrom=Null,offset=Null,title=NULL, max.y="max",suggestiveline=0, genomewide=T, size.x.labels=9, + size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!") genomewideline=NULL # was genomewideline=-log10(5e-8) + n = length(pvals) if (genomewide) { # use bonferroni since might be only a small region? - genomewideline = -log10(0.05/length(pvals)) } - d=data.frame(CHR=chrom,BP=offset,P=pvals) - - #limit to only chrs 1-23? - d=d[d$CHR %in% 1:23, ] - + genomewideline = -log10(0.05/n) } + offset = as.integer(offset) + if (n > 1000000) { offset = offset/10000 } + else if (n > 10000) { offset = offset/1000} + chro = as.integer(chrom) # already dealt with X and friends? + pvals = as.double(pvals) + d=data.frame(CHR=chro,BP=offset,P=pvals) if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) { - d=na.omit(d) + d=d[!is.na(d$P), ] + d=d[!is.na(d$BP), ] + d=d[!is.na(d$CHR), ] + #limit to only chrs 1-22, x=23,y=24,Mt=25? + d=d[d$CHR %in% 1:25, ] d=d[d$P>0 & d$P<=1, ] - d$logp = -log10(d$P) - + d$logp = as.double(-log10(d$P)) + dlen = length(d$P) d$pos=NA ticks=NULL lastbase=0 chrlist = unique(d$CHR) + chrlist = as.integer(chrlist) + chrlist = sort(chrlist) # returns lexical ordering + if (max.y=="max") { maxy = ceiling(max(d$logp)) } + else { maxy = max.y } nchr = length(chrlist) # may be any number? + maxy = max(maxy,1.1*genomewideline) if (nchr >= 2) { - for (x in c(1:nchr)) { + for (x in c(1:nchr)) { i = chrlist[x] # need the chrom number - may not == index if (x == 1) { # first time - d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP - tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1] - } else { - lastchr = chrlist[x-1] # previous whatever the list - lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1) - d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase - tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) + d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP # initialize to first BP of chr1 + dsub = subset(d,CHR==i) + dlen = length(dsub$P) + lastbase = max(dsub$pos) # last one + tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1] + lastchr = i + } else { + d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP+lastbase # one humongous contig + if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) { + cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos)) + } + tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) + lastchr = i + dsub = subset(d,CHR==i) + lastbase = max(dsub$pos) # last one } ticklim=c(min(d$pos),max(d$pos)) xlabs = chrlist } } else { # nchr is 1 nticks = 10 - last = max(offset) - first = min(offset) - tks = c() + last = max(d$BP) + first = min(d$BP) + tks = c(first) t = (last-first)/nticks # units per tick - for (x in c(1:nticks)) tks = c(tks,round(x*t)) - xlabs = tks + for (x in c(1:(nticks))) { + tks = c(tks,round(x*t)+first) } ticklim = c(first,last) } # else if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR)) } else { mycols=rep(coloursTouse,max(d$CHR)) } - - if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y - maxy = max(maxy,1.1*genomewideline) - # if (maxy<8) maxy=8 - # only makes sense if genome wide is assumed - we could have a fine mapping region? + dlen = length(d$P) + d$pranks = rank(d$P)/dlen + d$centiles = 100*d$pranks # small are interesting + d$sizes = ifelse((d$centile < 1),2,1) if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ] if (nchr >= 2) { - manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) - manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs) } + manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR),size=factor(sizes)) + manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs,limits=ticklim) + manplot=manplot+scale_size_manual(values = c(0.5,1.5)) # requires discreet scale - eg factor + #manplot=manplot+scale_size(values=c(0.5,2)) # requires continuous + } else { manplot=qplot(BP,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) - manplot=manplot+scale_x_continuous("BP") } + manplot=manplot+scale_x_continuous(name=paste("Chromosome",chrlist[1]), breaks=tks, labels=tks,limits=ticklim) + } manplot=manplot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy) manplot=manplot+scale_colour_manual(value=mycols) if (annotate) { manplot=manplot + geom_point(data=d.annotate, colour=I("green3")) } manplot=manplot + opts(legend.position = "none") manplot=manplot + opts(title=title) manplot=manplot+opts( - panel.background=theme_blank(), - axis.text.x=theme_text(size=size.x.labels, colour="grey50"), - axis.text.y=theme_text(size=size.y.labels, colour="grey50"), - axis.ticks=theme_segment(colour=NA) + panel.background=theme_blank(), + axis.text.x=theme_text(size=size.x.labels, colour="grey50"), + axis.text.y=theme_text(size=size.y.labels, colour="grey50"), + axis.ticks=theme_segment(colour=NA) ) - #manplot = manplot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank()) - #manplot = manplot + opts(panel.grid.major=theme_blank()) - if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red") manplot @@ -124,16 +141,24 @@ if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank()) qq } -rgqqMan = function(infile="/tmp/rgManQQtemplYC5wa",chromcolumn=1, offsetcolumn=2, pvalscolumns=c(3), + +rgqqMan = function(infile="/data/tmp/tmpNaxDwH/database/files/000/dataset_1.dat",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), title="rgManQQtest1",grey=0) { rawd = read.table(infile,head=T,sep='\t') dn = names(rawd) cc = dn[chromcolumn] oc = dn[offsetcolumn] -nams = c(cc,oc) +rawd[,cc] = sub('chr','',rawd[,cc],ignore.case = T) # just in case +rawd[,cc] = sub(':','',rawd[,cc],ignore.case = T) # ugh +rawd[,cc] = sub('X',23,rawd[,cc],ignore.case = T) +rawd[,cc] = sub('Y',24,rawd[,cc],ignore.case = T) +rawd[,cc] = sub('Mt',25,rawd[,cc], ignore.case = T) +nams = c(cc,oc) # for sorting plen = length(rawd[,1]) -doreorder=1 print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' ')) +rawd = rawd[do.call(order,rawd[nams]),] +# mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.... +# in case not yet ordered if (plen > 0) { for (pvalscolumn in pvalscolumns) { if (pvalscolumn > 0) @@ -142,19 +167,15 @@ mytitle = paste('p=',cname,', ',title,sep='') myfname = chartr(' ','_',cname) myqqplot = qq(rawd[,pvalscolumn],title=mytitle) - ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=6,height=4,dpi=100) + ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=6,dpi=96) + ggsave(filename=paste(myfname,"qqplot.pdf",sep='_'),myqqplot,width=8,height=6,dpi=96) print(paste('## qqplot on',cname,'done')) if ((chromcolumn > 0) & (offsetcolumn > 0)) { - if (doreorder) { - rawd = rawd[do.call(order,rawd[nams]),] - # mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.... - # in case not yet ordered - doreorder = 0 - } print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) - mymanplot= manhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) + mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) + ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=8,height=6,dpi=96) + ggsave(filename=paste(myfname,"manhattan.pdf",sep='_'),mymanplot,width=8,height=6,dpi=96) print(paste('## manhattan plot on',cname,'done')) - ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=6,height=4,dpi=100) } else { print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn, @@ -171,4 +192,4 @@ rgqqMan() # execute with defaults as substituted -#R script autogenerated by rgenetics/rgutils.py on 07/11/2010 20:03:37 +#R script autogenerated by rgenetics/rgutils.py on 04/08/2011 11:46:16 --- a/test-data/rgtestouts/rgManQQ/rgManQQtest1.html Thu Aug 04 10:57:03 2011 +1000 +++ b/test-data/rgtestouts/rgManQQ/rgManQQtest1.html Thu Aug 04 12:43:30 2011 +1000 @@ -45,7 +45,7 @@ Loading required package: proto -[1] "### 101 values read from /data/tmp/tmpB3Kfsc/database/files/000/dataset_1.dat read - now running plots" +[1] "### 101 values read from /data/tmp/tmpNaxDwH/database/files/000/dataset_1.dat read - now running plots" [1] "## qqplot on Allelep done" @@ -198,7 +198,7 @@ qq } -rgqqMan = function(infile="/data/tmp/tmpB3Kfsc/database/files/000/dataset_1.dat",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), +rgqqMan = function(infile="/data/tmp/tmpNaxDwH/database/files/000/dataset_1.dat",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), title="rgManQQtest1",grey=0) { rawd = read.table(infile,head=T,sep='\t') dn = names(rawd) @@ -250,6 +250,6 @@ </pre> -<h3><a href="http://rgenetics.org">Rgenetics</a> tool rgManQQ.py run at 04/08/2011 10:51:34</h3> +<h3><a href="http://rgenetics.org">Rgenetics</a> tool rgManQQ.py run at 04/08/2011 11:46:22</h3></div></body></html> Binary file test-data/sam_merge_out1.bam has changed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sam_merge_out1.log Thu Aug 04 12:43:30 2011 +1000 @@ -0,0 +1,5 @@ +[Thu Aug 04 12:17:33 EST 2011] net.sf.picard.sam.MergeSamFiles INPUT=[/data/tmp/tmp6eiFcc/database/files/000/dataset_1.dat, /data/tmp/tmp6eiFcc/database/files/000/dataset_2.dat, /data/tmp/tmp6eiFcc/database/files/000/dataset_2.dat] OUTPUT=/data/tmp/tmp6eiFcc/database/files/000/dataset_3.dat MERGE_SEQUENCE_DICTIONARIES=true SORT_ORDER=coordinate ASSUME_SORTED=false USE_THREADING=false TMP_DIR=/tmp/rlazarus VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +INFO 2011-08-04 12:17:33 MergeSamFiles Sorting input files using temp directory /tmp/rlazarus +INFO 2011-08-04 12:17:33 MergeSamFiles Finished reading inputs. +[Thu Aug 04 12:17:33 EST 2011] net.sf.picard.sam.MergeSamFiles done. +Runtime.totalMemory()=2028732416 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sam_merge_out2.log Thu Aug 04 12:43:30 2011 +1000 @@ -0,0 +1,5 @@ +[Thu Aug 04 12:18:15 EST 2011] net.sf.picard.sam.MergeSamFiles INPUT=[/data/tmp/tmp6eiFcc/database/files/000/dataset_5.dat, /data/tmp/tmp6eiFcc/database/files/000/dataset_6.dat, /data/tmp/tmp6eiFcc/database/files/000/dataset_7.dat] OUTPUT=/data/tmp/tmp6eiFcc/database/files/000/dataset_8.dat MERGE_SEQUENCE_DICTIONARIES=true SORT_ORDER=coordinate ASSUME_SORTED=false USE_THREADING=false TMP_DIR=/tmp/rlazarus VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +INFO 2011-08-04 12:18:15 MergeSamFiles Sorting input files using temp directory /tmp/rlazarus +INFO 2011-08-04 12:18:15 MergeSamFiles Finished reading inputs. +[Thu Aug 04 12:18:15 EST 2011] net.sf.picard.sam.MergeSamFiles done. +Runtime.totalMemory()=2028732416 --- a/test/base/twilltestcase.py Thu Aug 04 10:57:03 2011 +1000 +++ b/test/base/twilltestcase.py Thu Aug 04 12:43:30 2011 +1000 @@ -728,6 +728,10 @@ self.visit_url( "%s/datasets/%s/display/%s" % ( self.url, self.security.encode_id( hda_id ), base_name ) ) data = self.last_page() file( temp_name, 'wb' ).write( data ) + if self.keepOutdir > '': + ofn = os.path.join(self.keepOutdir,base_name) + shutil.copy(temp_name,ofn) + log.debug('## GALAXY_TEST_SAVE=%s. saved %s' % (self.keepOutdir,ofn)) try: # have to nest try-except in try-finally to handle 2.4 try: --- a/tool_conf.xml.sample Thu Aug 04 10:57:03 2011 +1000 +++ b/tool_conf.xml.sample Thu Aug 04 12:43:30 2011 +1000 @@ -343,7 +343,7 @@ <tool file="samtools/sam2interval.xml" /><tool file="samtools/sam_to_bam.xml" /><tool file="samtools/bam_to_sam.xml" /> - <tool file="picard/sam_merge.xml" /> + <tool file="samtools/sam_merge.xml" /><tool file="samtools/sam_pileup.xml" /><tool file="samtools/pileup_parser.xml" /><tool file="samtools/pileup_interval.xml" /> --- a/tools/samtools/sam_merge.xml Thu Aug 04 10:57:03 2011 +1000 +++ b/tools/samtools/sam_merge.xml Thu Aug 04 12:43:30 2011 +1000 @@ -1,18 +1,21 @@ -<tool id="sam_merge" name="Merge BAM Files" version="1.1.1"> +<tool id="sam_merge2" name="Merge BAM Files" version="1.1.2"><description>merges BAM files together</description><requirements> - <requirement type="package">samtools</requirement> + <requirement type="package">picard</requirement></requirements> - <command interpreter="python"> - sam_merge.py - $input1 - $output1 - $input2 + <command> + java -jar ${GALAXY_DATA_INDEX_DIR}/shared/jars/MergeSamFiles.jar MERGE_SEQUENCE_DICTIONARIES=$mergeSD OUTPUT=$output1 INPUT=$input1 INPUT=$input2 #for $i in $inputs - ${i.input} - #end for + INPUT=${i.input} + #end for + 2> $outlog </command><inputs> + <param name="title" label="Name for the output merged bam file" type="text" default="Merged.bam" + help="This name will appear in your history so use it to remember what the new file in your history contains" /> + <param name="mergeSD" value="true" type="boolean" label="Merge all component bam file headers into the merged bam file" + truevalue="true" falsevalue="false" checked="yes" + help="Control the MERGE_SEQUENCE_DICTIONARIES flag for Picard MergeSamFiles. Default (true) correctly propagates read groups and other important metadata" /><param name="input1" label="First file" type="data" format="bam" /><param name="input2" label="with file" type="data" format="bam" help="Need to add more files? Use controls below." /><repeat name="inputs" title="Input Files"> @@ -20,57 +23,39 @@ </repeat></inputs><outputs> - <data format="bam" name="output1" label="${tool.name} on ${on_string}: merged BAM" /> + <data format="bam" name="output1" label="${title}.bam" /> + <data format="txt" name="outlog" label="${title}_${tool.name}.log" /></outputs><tests><!-- TODO: add ability to test framework to test without at least one repeat element value + --><test> - --> - <!-- - Bam merge command: - samtools merge test-data/sam_merge_out1.bam test-data/sam_merge_in1.bam test-data/sam_merge_in2.bam - --> - <!-- + <param name="title" value="test1" /> + <param name="mergeSD" value="true" /><param name="input1" value="sam_merge_in1.bam" ftype="bam" /><param name="input2" value="sam_merge_in2.bam" ftype="bam" /><output name="output1" file="sam_merge_out1.bam" ftype="bam" /> + <output name="outlog" file="sam_merge_out1.log" ftype="txt" lines_diff="8"/></test> - --><test> - <!-- - Bam merge command: - samtools merge sam_merge_out2.bam test-data/sam_merge_in1.bam test-data/sam_merge_in2.bam test-data/sam_merge_in3.bam - --> + <param name="title" value="test2" /> + <param name="mergeSD" value="true" /><param name="input1" value="sam_merge_in1.bam" ftype="bam" /><param name="input2" value="sam_merge_in2.bam" ftype="bam" /><param name="input" value="sam_merge_in3.bam" ftype="bam" /><output name="output1" file="sam_merge_out2.bam" ftype="bam" /> + <output name="outlog" file="sam_merge_out2.log" ftype="txt" lines_diff="8"/></test> - <!-- TODO: add ability to test code to be able to test with multiple - inputs (parameters with same value) - <test> - --> - <!-- - Bam merge command: - samtools merge test-data/sam_merge_out3.bam test-data/sam_merge_in1.bam test-data/sam_merge_in2.bam test-data/sam_merge_in3.bam test-data/sam_merge_in4.bam - --> - <!-- - <param name="input1" value="sam_merge_in1.bam" ftype="bam" /> - <param name="input2" value="sam_merge_in2.bam" ftype="bam" /> - <param name="input" value="sam_merge_in3.bam" ftype="bam" /> - <param name="input" value="sam_merge_in4.bam" ftype="bam" /> - <output name="output1" file="sam_merge_out3.bam" ftype="bam" /> - </test> - --></tests><help> **What it does** -This tool uses SAMTools_' merge command to merge any number of BAM files together into one BAM file. +This tool uses the Picard_ merge command to merge any number of BAM files together into one BAM file while preserving the BAM +metadata such as read groups -.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml +.. _Picard: http://picard.sourceforge.net/command-line-overview.shtml#MergeSamFiles </help></tool> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.