galaxy-dist commit 7cc9329fd895: Fixes to some rgenetics tests and test outputs
# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User fubar: ross Lazarus at gmail period com # Date 1283236053 14400 # Node ID 7cc9329fd895049827d1bf5436f6aa11bf9d30ed # Parent 6b2dffb999d8595d410dc299fb512b76c35be39e Fixes to some rgenetics tests and test outputs Some odd bugs in the new version of ggplot2 in R Fixes to the Rrun function in rgutils Binary file test-data/rgtestouts/rgManQQ/Allelep_manhattan.png has changed --- a/tools/rgenetics/rgManQQ.py +++ b/tools/rgenetics/rgManQQ.py @@ -75,25 +75,25 @@ manhattan = function(chrom=NULL,offset=N # only makes sense if genome wide is assumed - we could have a fine mapping region? if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ] - plot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) - plot=plot+scale_x_continuous(name="Chromosome", breaks=ticks, labels=(unique(d$CHR))) - plot=plot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy) - plot=plot+scale_colour_manual(value=mycols) - if (annotate) plot=plot + geom_point(data=d.annotate, colour=I("green3")) - plot=plot + opts(legend.position = "none") - plot=plot + opts(title=title) - plot=plot+opts( + manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) + manplot=manplot+scale_x_continuous(name="Chromosome", breaks=ticks, labels=(unique(d$CHR))) + manplot=manplot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy) + manplot=manplot+scale_colour_manual(value=mycols) + if (annotate) { manplot=manplot + geom_point(data=d.annotate, colour=I("green3")) } + manplot=manplot + opts(legend.position = "none") + manplot=manplot + opts(title=title) + manplot=manplot+opts( panel.background=theme_blank(), axis.text.x=theme_text(size=size.x.labels, colour="grey50"), axis.text.y=theme_text(size=size.y.labels, colour="grey50"), axis.ticks=theme_segment(colour=NA) ) - #plot = plot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank()) - #plot = plot + opts(panel.grid.major=theme_blank()) + #manplot = manplot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank()) + #manplot = manplot + opts(panel.grid.major=theme_blank()) - if (suggestiveline) plot=plot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) - if (genomewideline) plot=plot+geom_hline(yintercept=genomewideline,colour="red") - plot + if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) + if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red") + manplot } else { stop("Make sure your data frame contains columns CHR, BP, and P") } @@ -110,12 +110,12 @@ qq = function(pvector, title=NULL, spart # ylab=expression(Observed~~-log[10](italic(p))), xlim=c(0,max(e)), ylim=c(0,max(e))) # lines(e,e,col="red") #You'll need ggplot2 installed to do the rest - plot=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red") - plot=plot+opts(title=title) - plot=plot+scale_x_continuous(name=expression(Expected~~-log[10](italic(p)))) - plot=plot+scale_y_continuous(name=expression(Observed~~-log[10](italic(p)))) + qq=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red") + qq=qq+opts(title=title) + qq=qq+scale_x_continuous(name=expression(Expected~~-log[10](italic(p)))) + qq=qq+scale_y_continuous(name=expression(Observed~~-log[10](italic(p)))) if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank()) - plot + qq } """ @@ -140,6 +140,7 @@ if (plen > 0) { mytitle = paste('p=',cname,', ',title,sep='') myfname = chartr(' ','_',cname) myqqplot = qq(rawd[,pvalscolumn],title=mytitle) + ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=11) print(paste('## qqplot on',cname,'done')) if ((chromcolumn > 0) & (offsetcolumn > 0)) { if (doreorder) { @@ -151,13 +152,12 @@ if (plen > 0) { print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) mymanplot= manhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) print(paste('## manhattan plot on',cname,'done')) - ggsave(file=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=11,height=8,dpi=100) + ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=11,height=8) } else { print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn, 'so no Manhattan plot - supply both chromosome and offset as numerics for Manhattan plots if required')) } - ggsave(file=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=11,dpi=100) } else { print(paste('pvalue column =',pvalscolumn,'Cannot parse it so no plots possible')) @@ -177,9 +177,10 @@ def doManQQ(input_fname,chrom_col,offset to make them do our bidding - and save the resulting code for posterity this can be called externally, I guess...for QC eg? """ - rcmd = '%s%s' % (rcode,rcode2 % (input_fname,chrom_col,offset_col,pval_cols,title,grey)) rlog,flist = RRun(rcmd=rcmd,title=ctitle,outdir=outdir) + rlog.append('## R script=') + rlog.append(rcmd) return rlog,flist --- a/test-data/rgtestouts/rgManQQ/rgManQQtest1.R +++ b/test-data/rgtestouts/rgManQQ/rgManQQtest1.R @@ -108,7 +108,7 @@ qq = function(pvector, title=NULL, spart if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank()) plot } -rgqqMan = function(infile="/share/shared/galaxy/test-data/smallwgaP.xls",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(6,8), +rgqqMan = function(infile="/opt/galaxy/test-data/smallwgaP.xls",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), title="rgManQQtest1",grey=0) { rawd = read.table(infile,head=T,sep=' ') dn = names(rawd) @@ -155,4 +155,4 @@ if (plen > 0) { rgqqMan() # execute with defaults as substituted -#R script autogenerated by rgenetics/rgutils.py on 09/05/2010 21:23:26 +#R script autogenerated by rgenetics/rgutils.py on 30/08/2010 04:09:48 --- a/tools/rgenetics/rgGRR.py +++ b/tools/rgenetics/rgGRR.py @@ -78,7 +78,7 @@ class NullDevice: pass tempstderr = sys.stderr # save -sys.stderr = NullDevice() +#sys.stderr = NullDevice() # need to avoid blather about deprecation and other strange stuff from scipy # the current galaxy job runner assumes that # the job is in error if anything appears on sys.stderr --- a/test-data/rgtestouts/rgManQQ/rgManQQtest1.html +++ b/test-data/rgtestouts/rgManQQ/rgManQQtest1.html @@ -15,30 +15,21 @@ <tr><td><a href="Allelep_manhattan.png"><img src="Allelep_manhattan.png" alt="Allelep_manhattan.png hspace="10" width="400"><br>(Click to download image Allelep_manhattan.png)</a></td></tr><tr><td><a href="Allelep_qqplot.png"><img src="Allelep_qqplot.png" alt="Allelep_qqplot.png hspace="10" width="400"><br>(Click to download image Allelep_qqplot.png)</a></td></tr> -<tr><td><a href="Armitagep_manhattan.png"><img src="Armitagep_manhattan.png" alt="Armitagep_manhattan.png hspace="10" width="400"><br>(Click to download image Armitagep_manhattan.png)</a></td></tr> -<tr><td><a href="Armitagep_qqplot.png"><img src="Armitagep_qqplot.png" alt="Armitagep_qqplot.png hspace="10" width="400"><br>(Click to download image Armitagep_qqplot.png)</a></td></tr><tr><td><a href="rgManQQtest1.R">rgManQQtest1.R</a></td></tr> +<tr><td><a href="rgManQQtest1.R.log">rgManQQtest1.R.log</a></td></tr></table><h3>R log follows below</h3><hr><pre> -Loading required package: proto - -Loading required package: grid - Loading required package: reshape Loading required package: plyr -Loading required package: digest +Loading required package: grid -[1] "### 101 values read from /share/shared/galaxy/test-data/smallwgaP.xls read - now running plots" +Loading required package: proto -[1] "## qqplot on Armitagep done" - -[1] "## manhattan on Armitagep starting 2 3 6" - -[1] "## manhattan plot on Armitagep done" +[1] "### 101 values read from /opt/galaxy/test-data/smallwgaP.xls read - now running plots" [1] "## qqplot on Allelep done" @@ -46,8 +37,166 @@ Loading required package: digest [1] "## manhattan plot on Allelep done" +## R script= + +# license not stated so I'm assuming LGPL is ok for my derived work? +# generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics +# Originally created as qqman with the following +# attribution: +#-------------- +# Stephen Turner +# http://StephenTurner.us/ +# http://GettingGeneticsDone.blogspot.com/ + +# Last updated: Tuesday, December 22, 2009 +# R code for making manhattan plots and QQ plots from plink output files. +# With GWAS data this can take a lot of memory. Recommended for use on +# 64bit machines only, for now. + +# + +library(ggplot2) + +coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen') +# not too fugly but need a colour expert please... + + +manhattan = function(chrom=NULL,offset=NULL,pvals=NULL, title=NULL, max.y="max", + suggestiveline=0, genomewide=T, size.x.labels=9, size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { + + if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!") + genomewideline=NULL # was genomewideline=-log10(5e-8) + if (genomewide) { # use bonferroni since might be only a small region? + genomewideline = -log10(0.05/length(pvals)) } + d=data.frame(CHR=chrom,BP=offset,P=pvals) + + #limit to only chrs 1-23? + d=d[d$CHR %in% 1:23, ] + + if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) { + d=na.omit(d) + d=d[d$P>0 & d$P<=1, ] + d$logp = -log10(d$P) + + d$pos=NA + ticks=NULL + lastbase=0 + chrlist = unique(d$CHR) + nchr = length(chrlist) # may be any number? + for (x in c(1:nchr)) { + i = chrlist[x] # need the chrom number - may not == index + if (x == 1) { # first time + d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP + } else { + lastchr = chrlist[x-1] # previous whatever the list + lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1) + d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase + } + ticks=c(ticks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) + } + ticklim=c(min(d$pos),max(d$pos)) + if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR)) + } else { + mycols=rep(coloursTouse,max(d$CHR)) + } + + if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y + maxy = max(maxy,1.1*genomewideline) + # if (maxy<8) maxy=8 + # only makes sense if genome wide is assumed - we could have a fine mapping region? + if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ] + + manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) + manplot=manplot+scale_x_continuous(name="Chromosome", breaks=ticks, labels=(unique(d$CHR))) + manplot=manplot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy) + manplot=manplot+scale_colour_manual(value=mycols) + if (annotate) { manplot=manplot + geom_point(data=d.annotate, colour=I("green3")) } + manplot=manplot + opts(legend.position = "none") + manplot=manplot + opts(title=title) + manplot=manplot+opts( + panel.background=theme_blank(), + axis.text.x=theme_text(size=size.x.labels, colour="grey50"), + axis.text.y=theme_text(size=size.y.labels, colour="grey50"), + axis.ticks=theme_segment(colour=NA) + ) + #manplot = manplot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank()) + #manplot = manplot + opts(panel.grid.major=theme_blank()) + + if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) + if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red") + manplot + } else { + stop("Make sure your data frame contains columns CHR, BP, and P") + } +} + + + +qq = function(pvector, title=NULL, spartan=F) { + # Thanks to Daniel Shriner at NHGRI for providing this code for creating expected and observed values + o = -log10(sort(pvector,decreasing=F)) + e = -log10( 1:length(o)/length(o) ) + # you could use base graphics + # plot(e,o,pch=19,cex=0.25, xlab=expression(Expected~~-log[10](italic(p))), + # ylab=expression(Observed~~-log[10](italic(p))), xlim=c(0,max(e)), ylim=c(0,max(e))) + # lines(e,e,col="red") + #You'll need ggplot2 installed to do the rest + qq=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red") + qq=qq+opts(title=title) + qq=qq+scale_x_continuous(name=expression(Expected~~-log[10](italic(p)))) + qq=qq+scale_y_continuous(name=expression(Observed~~-log[10](italic(p)))) + if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank()) + qq +} +rgqqMan = function(infile="/opt/galaxy/test-data/smallwgaP.xls",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), +title="rgManQQtest1",grey=0) { +rawd = read.table(infile,head=T,sep=' ') +dn = names(rawd) +cc = dn[chromcolumn] +oc = dn[offsetcolumn] +nams = c(cc,oc) +plen = length(rawd[,1]) +doreorder=1 +print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' ')) +if (plen > 0) { + for (pvalscolumn in pvalscolumns) { + if (pvalscolumn > 0) + { + cname = names(rawd)[pvalscolumn] + mytitle = paste('p=',cname,', ',title,sep='') + myfname = chartr(' ','_',cname) + myqqplot = qq(rawd[,pvalscolumn],title=mytitle) + print(paste('## qqplot on',cname,'done')) + if ((chromcolumn > 0) & (offsetcolumn > 0)) { + if (doreorder) { + rawd = rawd[do.call(order,rawd[nams]),] + # mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.... + # in case not yet ordered + doreorder = 0 + } + print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) + mymanplot= manhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) + print(paste('## manhattan plot on',cname,'done')) + ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=11,height=8,dpi=100) + } + else { + print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn, + 'so no Manhattan plot - supply both chromosome and offset as numerics for Manhattan plots if required')) + } + ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=11,dpi=100) + } + else { + print(paste('pvalue column =',pvalscolumn,'Cannot parse it so no plots possible')) + } + } # for pvalscolumn + } else { print('## Problem - no values available to plot - was there really a chromosome and offset column?') } +} + +rgqqMan() +# execute with defaults as substituted + </pre> -<h3><a href="http://rgenetics.org">Rgenetics</a> tool rgManQQ.py run at 09/05/2010 21:23:34</h3> +<h3><a href="http://rgenetics.org">Rgenetics</a> tool rgManQQ.py run at 30/08/2010 20:46:51</h3></div></body></html> --- a/tools/rgenetics/rgManQQ.xml +++ b/tools/rgenetics/rgManQQ.xml @@ -42,14 +42,10 @@ <param name='chrom_col' value='1' /><param name='offset_col' value='2' /><param name='grey' value='0' /> - <output name='out_html' file='rgtestouts/rgManQQ/rgManQQtest1.html' ftype='html' lines_diff='17'> - <extra_files type="file" name='Allelep_manhattan.png' value='rgtestouts/rgManQQ/Allelep_manhattan.png' compare="sim_size" - delta = "4000" /> - <extra_files type="file" name='Allelep_qqplot.png' value='rgtestouts/rgManQQ/Allelep_qqplot.png' compare="sim_size" - delta = "4000" /> - <extra_files type="file" name='Armitagep_manhattan.png' value='rgtestouts/rgManQQ/Armitagep_manhattan.png' compare="sim_size" + <output name='out_html' file='rgtestouts/rgManQQ/rgManQQtest1.html' ftype='html' lines_diff='30'> + <extra_files type="file" name='Allelep_manhattan.png' value='rgtestouts/rgManQQ/Allelep_manhattan.png' compare="sim_size" delta = "4000"/> - <extra_files type="file" name='Armitagep_qqplot.png' value='rgtestouts/rgManQQ/Armitagep_qqplot.png' compare="sim_size" + <extra_files type="file" name='Allelep_qqplot.png' value='rgtestouts/rgManQQ/Allelep_qqplot.png' compare="sim_size" delta = "4000" /><extra_files type="file" name='rgManQQtest1.R' value='rgtestouts/rgManQQ/rgManQQtest1.R' compare="diff" lines_diff="160"/></output> --- a/tools/rgenetics/rgutils.py +++ b/tools/rgenetics/rgutils.py @@ -122,32 +122,36 @@ def RRun(rcmd=[],outdir=None,title='myR' tempout=False rname = '%s.R' % title stoname = '%s.R.log' % title + cwd = os.getcwd() if outdir: # want a specific path try: os.makedirs(outdir) # might not be there yet... except: pass - else: - tempout = True - outdir = tempfile.mkdtemp(prefix=title) + os.chdir(outdir) if type(rcmd) == type([]): script = '\n'.join(rcmd) else: # string script = rcmd sto = file(stoname,'w') - rscriptname = os.path.join(outdir,rname) - rscript = file(rscriptname,'w') + rscript = file(rname,'w') rscript.write(script) rscript.write('\n#R script autogenerated by rgenetics/rgutils.py on %s\n' % timenow()) rscript.close() - vcl = '%s --slave --vanilla < %s' % (rexe,rscriptname) - x = subprocess.Popen(vcl,shell=True,stderr=sto,stdout=sto,cwd=outdir) + vcl = '%s --slave --vanilla < %s' % (rexe,rname) + if outdir: + x = subprocess.Popen(vcl,shell=True,stderr=sto,stdout=sto,cwd=outdir) + else: + x = subprocess.Popen(vcl,shell=True,stderr=sto,stdout=sto) retval = x.wait() sto.close() rlog = file(stoname,'r').readlines() if retval <> 0: rlog.insert(0,'Nonzero exit code = %d' % retval) # indicate failure - flist = os.listdir(outdir) + if outdir: + flist = os.listdir(outdir) + else: + flist = os.listdir(os.getcwd()) flist.sort flist = [(x,x) for x in flist] for i,x in enumerate(flist): @@ -155,16 +159,8 @@ def RRun(rcmd=[],outdir=None,title='myR' flist[i] = (x,'R script for %s' % title) elif x == stoname: flist[i] = (x,'R log for %s' % title) - if tidy and tempout: # we used a temp one - fix that - flist = os.listdir(outdir) - for f in flist: - if os.path.isdir(f): - flist2 = os.listdir(f) - for f2 in flist2: - os.unlink(os.path.abspath(f2)) - else: - os.unlink(os.path.abspath(f)) - os.rmdir(outdir) + if outdir: + os.chdir(cwd) return rlog,flist # for html layout def runPlink(bfn='bar',ofn='foo',logf=None,plinktasks=[],cd='./',vclbase = []): --- a/tools/rgenetics/rgtest.sh +++ b/tools/rgenetics/rgtest.sh @@ -41,7 +41,7 @@ CL="python $TOOLPATH/$TOOL.py "$INPATH/s #python /opt/galaxy/tools/rgenetics/rgManQQ.py /opt/galaxy/test-data/smallwgaP.xls rgManQQtest1 #/opt/galaxy/test-data/rgtestouts/rgManQQ/rgManQQtest1.html /opt/galaxy/test-data/rgtestouts/rgManQQ 1 2 5,7 echo "Testing $TOOL using $CL" -python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 5,7 0 +python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 7 0 TOOL="rgfakePhe" NPRE=${TOOL}test1 OUTPATH="$OROOT/$TOOL" Binary file test-data/rgtestouts/rgManQQ/Allelep_qqplot.png has changed
participants (1)
-
commits-noreply@bitbucket.org