commit/galaxy-central: fubar: Fixes to rgManQQ.py to handle chromosome oddities like 'chr1' and X.
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/2b55d3bd13d4/ changeset: 2b55d3bd13d4 user: fubar date: 2011-07-20 04:17:43 summary: Fixes to rgManQQ.py to handle chromosome oddities like 'chr1' and X. Also cleaned up redundant file recreation - the code passes column numbers to R so the raw data can be read as supplied by Galaxy Thanks to Alison Harrill for exposing some more input edge cases... affected #: 3 files (1.3 KB) --- a/test-data/rgtestouts/rgManQQ/rgManQQtest1.html Tue Jul 19 21:22:07 2011 -0400 +++ b/test-data/rgtestouts/rgManQQ/rgManQQtest1.html Wed Jul 20 12:17:43 2011 +1000 @@ -13,8 +13,8 @@ <h1>rgManQQtest1</h1><table> -<tr><td><a href="Allelep_manhattan.png"><img src="Allelep_manhattan.png" alt="Allelep_manhattan.png hspace="10" width="400"><br>(Click to download image Allelep_manhattan.png)</a></td></tr> -<tr><td><a href="Allelep_qqplot.png"><img src="Allelep_qqplot.png" alt="Allelep_qqplot.png hspace="10" width="400"><br>(Click to download image Allelep_qqplot.png)</a></td></tr> +<tr><td><a href="Allelep_manhattan.png"><img src="Allelep_manhattan.png" title="Allelep_manhattan.png hspace="10" width="400"><br>(Click to download image Allelep_manhattan.png)</a></td></tr> +<tr><td><a href="Allelep_qqplot.png"><img src="Allelep_qqplot.png" title="Allelep_qqplot.png hspace="10" width="400"><br>(Click to download image Allelep_qqplot.png)</a></td></tr><tr><td><a href="rgManQQtest1.R">rgManQQtest1.R</a></td></tr><tr><td><a href="rgManQQtest1.R.log">rgManQQtest1.R.log</a></td></tr></table> @@ -35,7 +35,7 @@ - round_any + rename, round_any @@ -43,11 +43,11 @@ Loading required package: proto -[1] "### 101 values read from /tmp/rgManQQtemplYC5wa read - now running plots" +[1] "### 101 values read from /data/tmp/tmpTPXdE1/database/files/000/dataset_1.dat read - now running plots" [1] "## qqplot on Allelep done" -[1] "## manhattan on Allelep starting 1 2 3" +[1] "## manhattan on Allelep starting 2 3 8" [1] "## manhattan plot on Allelep done" @@ -72,26 +72,30 @@ library(ggplot2) coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen') -# not too fugly but need a colour expert please... +# not too ugly but need a colour expert please... -manhattan = function(chrom=NULL,offset=NULL,pvals=NULL, title=NULL, max.y="max", - suggestiveline=0, genomewide=T, size.x.labels=9, size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { - +DrawManhattan = function(pvals=Null,chrom=Null,offset=Null,title=NULL, max.y="max",suggestiveline=0, genomewide=T, size.x.labels=9, + size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!") genomewideline=NULL # was genomewideline=-log10(5e-8) if (genomewide) { # use bonferroni since might be only a small region? genomewideline = -log10(0.05/length(pvals)) } - d=data.frame(CHR=chrom,BP=offset,P=pvals) - - #limit to only chrs 1-23? - d=d[d$CHR %in% 1:23, ] - + chro = sub('chr','',chrom, ignore.case = T) # just in case + chro = sub(':','',chro, ignore.case = T) # ugh + chro = sub('X',23,chro, ignore.case = T) + chro = sub('Y',24,chro, ignore.case = T) + chro = sub('Mt',25,chro, ignore.case = T) + offset = as.integer(offset) + pvals = as.double(pvals) + chro = as.integer(chro) + d=data.frame(CHR=chro,BP=offset,P=pvals) + #limit to only chrs 1-22, x=23,y=24,Mt=25? + d=d[d$CHR %in% 1:25, ] if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) { - d=na.omit(d) + #d=na.omit(d) d=d[d$P>0 & d$P<=1, ] - d$logp = -log10(d$P) - + d$logp = as.double(-log10(d$P)) d$pos=NA ticks=NULL lastbase=0 @@ -107,7 +111,11 @@ lastchr = chrlist[x-1] # previous whatever the list lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1) d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase + if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) { + cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos)) + } tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) + } ticklim=c(min(d$pos),max(d$pos)) xlabs = chrlist @@ -129,8 +137,6 @@ if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y maxy = max(maxy,1.1*genomewideline) - # if (maxy<8) maxy=8 - # only makes sense if genome wide is assumed - we could have a fine mapping region? if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ] if (nchr >= 2) { manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) @@ -149,9 +155,6 @@ axis.text.y=theme_text(size=size.y.labels, colour="grey50"), axis.ticks=theme_segment(colour=NA) ) - #manplot = manplot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank()) - #manplot = manplot + opts(panel.grid.major=theme_blank()) - if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red") manplot @@ -178,7 +181,8 @@ if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank()) qq } -rgqqMan = function(infile="/tmp/rgManQQtemplYC5wa",chromcolumn=1, offsetcolumn=2, pvalscolumns=c(3), + +rgqqMan = function(infile="/data/tmp/tmpTPXdE1/database/files/000/dataset_1.dat",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), title="rgManQQtest1",grey=0) { rawd = read.table(infile,head=T,sep='\t') dn = names(rawd) @@ -206,7 +210,7 @@ doreorder = 0 } print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) - mymanplot= manhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) + mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) print(paste('## manhattan plot on',cname,'done')) ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=6,height=4,dpi=100) } @@ -227,6 +231,6 @@ </pre> -<h3><a href="http://rgenetics.org">Rgenetics</a> tool rgManQQ.py run at 07/11/2010 20:04:20</h3> +<b><a href="http://rgenetics.org">Galaxy Rgenetics</a> tool output rgManQQ.py run at 20/07/2011 12:08:46</b><br/></div></body></html> --- a/tools/rgenetics/rgManQQ.py Tue Jul 19 21:22:07 2011 -0400 +++ b/tools/rgenetics/rgManQQ.py Wed Jul 20 12:17:43 2011 +1000 @@ -1,5 +1,7 @@ #!/usr/local/bin/python - +# rgmanqq updated july 19 to deal with x,y and mt +# lots of fixes +# ross lazarus import sys,math,shutil,subprocess,os,time,tempfile,string from os.path import abspath from rgutils import timenow, RRun, galhtmlprefix, galhtmlpostfix, galhtmlattr @@ -18,7 +20,7 @@ # http://StephenTurner.us/ # http://GettingGeneticsDone.blogspot.com/ -# Last updated: Tuesday, December 22, 2009 +# Last updated: 19 July 2011 by Ross Lazarus # R code for making manhattan plots and QQ plots from plink output files. # With GWAS data this can take a lot of memory. Recommended for use on # 64bit machines only, for now. @@ -28,26 +30,30 @@ library(ggplot2) coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen') -# not too fugly but need a colour expert please... +# not too ugly but need a colour expert please... -manhattan = function(chrom=NULL,offset=NULL,pvals=NULL, title=NULL, max.y="max", - suggestiveline=0, genomewide=T, size.x.labels=9, size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { - +DrawManhattan = function(pvals=Null,chrom=Null,offset=Null,title=NULL, max.y="max",suggestiveline=0, genomewide=T, size.x.labels=9, + size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!") genomewideline=NULL # was genomewideline=-log10(5e-8) if (genomewide) { # use bonferroni since might be only a small region? genomewideline = -log10(0.05/length(pvals)) } - d=data.frame(CHR=chrom,BP=offset,P=pvals) - - #limit to only chrs 1-23? - d=d[d$CHR %in% 1:23, ] - + chro = sub('chr','',chrom, ignore.case = T) # just in case + chro = sub(':','',chro, ignore.case = T) # ugh + chro = sub('X',23,chro, ignore.case = T) + chro = sub('Y',24,chro, ignore.case = T) + chro = sub('Mt',25,chro, ignore.case = T) + offset = as.integer(offset) + pvals = as.double(pvals) + chro = as.integer(chro) + d=data.frame(CHR=chro,BP=offset,P=pvals) + #limit to only chrs 1-22, x=23,y=24,Mt=25? + d=d[d$CHR %in% 1:25, ] if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) { - d=na.omit(d) + #d=na.omit(d) d=d[d$P>0 & d$P<=1, ] - d$logp = -log10(d$P) - + d$logp = as.double(-log10(d$P)) d$pos=NA ticks=NULL lastbase=0 @@ -63,7 +69,11 @@ lastchr = chrlist[x-1] # previous whatever the list lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1) d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase + if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) { + cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos)) + } tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) + } ticklim=c(min(d$pos),max(d$pos)) xlabs = chrlist @@ -85,8 +95,6 @@ if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y maxy = max(maxy,1.1*genomewideline) - # if (maxy<8) maxy=8 - # only makes sense if genome wide is assumed - we could have a fine mapping region? if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ] if (nchr >= 2) { manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) @@ -105,9 +113,6 @@ axis.text.y=theme_text(size=size.y.labels, colour="grey50"), axis.ticks=theme_segment(colour=NA) ) - #manplot = manplot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank()) - #manplot = manplot + opts(panel.grid.major=theme_blank()) - if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red") manplot @@ -134,12 +139,13 @@ if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank()) qq } + """ # we need another string to avoid confusion over string substitutions with %in% # instantiate rcode2 string with infile,chromcol,offsetcol,pvalscols,title before saving and running -rcode2 = """rgqqMan = function(infile="%s",chromcolumn=%s, offsetcolumn=%s, pvalscolumns=%s, +rcode2 = """rgqqMan = function(infile="%s",chromcolumn=%d, offsetcolumn=%d, pvalscolumns=c(%s), title="%s",grey=%d) { rawd = read.table(infile,head=T,sep='\\t') dn = names(rawd) @@ -167,7 +173,7 @@ doreorder = 0 } print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) - mymanplot= manhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) + mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) print(paste('## manhattan plot on',cname,'done')) ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=6,height=4,dpi=100) } @@ -198,50 +204,13 @@ this can be called externally, I guess...for QC eg? """ if debug: - print 'doManQQ',input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir - ffd,filtered_fname = tempfile.mkstemp(prefix='rgManQQtemp') - f = open(filtered_fname,'w') - inf = open(input_fname,'r') - ohead = inf.readline().strip().split('\t') # see if we have a header - inf.seek(0) # rewind - newhead = ['pval%d' % (x+1) for x in pval_cols] - newhead.insert(0,'Offset') - newhead.insert(0,'Chrom') - havehead = 0 - wewant = [chrom_col,offset_col] - wewant += pval_cols - try: - allnums = ['%d' % x for x in ohead] # this should barf if non numerics == header row? - f.write('\t'.join(newhead)) # for R to read - f.write('\n') - except: - havehead = 1 - newhead = [ohead[chrom_col],ohead[offset_col]] - newhead += [ohead[x] for x in pval_cols] - f.write('\t'.join(newhead)) # use the original head - f.write('\n') - for i,row in enumerate(inf): - if i == 0 and havehead: - continue # ignore header - sr = row.strip().split('\t') - if len(sr) > 1: - if sr[chrom_col].lower().find('chr') <> -1: - sr[chrom_col] = sr[chrom_col][3:] - newr = [sr[x] for x in wewant] # grab cols we need - s = '\t'.join(newr) - f.write(s) - f.write('\n') - f.close() - pvc = [x+3 for x in range(len(pval_cols))] # 2 for offset and chrom, 1 for r offset start - pvc = 'c(%s)' % (','.join(map(str,pvc))) - rcmd = '%s%s' % (rcode,rcode2 % (filtered_fname,'1','2',pvc,title,grey)) + print 'doManQQ',input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir + rcmd = '%s%s' % (rcode,rcode2 % (input_fname,chrom_col,offset_col,pval_cols,title,grey)) if debug: - print 'running\n%s\n' % rcmd + print 'running\n%s\n' % rcmd rlog,flist = RRun(rcmd=rcmd,title=ctitle,outdir=outdir) rlog.append('## R script=') rlog.append(rcmd) - if beTidy: - os.unlink(filtered_fname) return rlog,flist @@ -272,19 +241,20 @@ offset_col = -1 p = sys.argv[7].strip().split(',') try: - p = [int(x) for x in p] + q = [int(x) for x in p] except: - p = [-1] + p = -1 if chrom_col == -1 or offset_col == -1: # was passed as zero - do not do manhattan plots chrom_col = -1 offset_col = -1 grey = 0 if (sys.argv[8].lower() in ['1','true']): grey = 1 - if p == [-1]: + if p == -1: print >> sys.stderr,'## Cannot run rgManQQ - missing pval column' sys.exit(1) - rlog,flist = doManQQ(input_fname,chrom_col,offset_col,p,title,grey,ctitle,outdir) + p = ['%d' % (int(x) + 1) for x in p] + rlog,flist = doManQQ(input_fname,chrom_col+1,offset_col+1,','.join(p),title,grey,ctitle,outdir) flist.sort() html = [galhtmlprefix % progname,] html.append('<h1>%s</h1>' % title) @@ -294,7 +264,7 @@ fname,expl = row # RRun returns pairs of filenames fiddled for the log and R script e = os.path.splitext(fname)[-1] if e in ['.png','.jpg']: - s= '<tr><td><a href="%s"><img src="%s" alt="%s hspace="10" width="400"><br>(Click to download image %s)</a></td></tr>' \ + s= '<tr><td><a href="%s"><img src="%s" title="%s hspace="10" width="400"><br>(Click to download image %s)</a></td></tr>' \ % (fname,fname,expl,expl ) html.append(s) else: @@ -317,3 +287,4 @@ if __name__ == "__main__": main() + --- a/tools/rgenetics/rgManQQ.xml Tue Jul 19 21:22:07 2011 -0400 +++ b/tools/rgenetics/rgManQQ.xml Wed Jul 20 12:17:43 2011 +1000 @@ -1,4 +1,4 @@ -<tool id="rgManQQ1" name="Manhattan/QQ:" version="1.0.1"> +<tool id="rgManQQ1" name="Manhattan/QQ:" version="1.0.2"><code file="rgManQQ_code.py"/><description>Plots for WGA P values</description> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket