commit/galaxy-central: fubar: Minor tweak to the x axis sorting of Manhattan plots to fix the fact that in R, unique() returns an alphabetically sorted list - need to use sort on the result to get correct lexicographic (10 comes after 9 eg) on the X axis...
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/dacf97a6c663/ changeset: dacf97a6c663 user: fubar date: 2011-07-20 05:34:27 summary: Minor tweak to the x axis sorting of Manhattan plots to fix the fact that in R, unique() returns an alphabetically sorted list - need to use sort on the result to get correct lexicographic (10 comes after 9 eg) on the X axis... affected #: 2 files (181 bytes) --- a/test-data/rgtestouts/rgManQQ/rgManQQtest1.html Tue Jul 19 22:18:40 2011 -0400 +++ b/test-data/rgtestouts/rgManQQ/rgManQQtest1.html Wed Jul 20 13:34:27 2011 +1000 @@ -43,7 +43,7 @@ Loading required package: proto -[1] "### 101 values read from /data/tmp/tmpTPXdE1/database/files/000/dataset_1.dat read - now running plots" +[1] "### 101 values read from /data/tmp/tmpM8NZ50/database/files/000/dataset_1.dat read - now running plots" [1] "## qqplot on Allelep done" @@ -62,7 +62,7 @@ # http://StephenTurner.us/ # http://GettingGeneticsDone.blogspot.com/ -# Last updated: Tuesday, December 22, 2009 +# Last updated: 19 July 2011 by Ross Lazarus # R code for making manhattan plots and QQ plots from plink output files. # With GWAS data this can take a lot of memory. Recommended for use on # 64bit machines only, for now. @@ -81,14 +81,9 @@ genomewideline=NULL # was genomewideline=-log10(5e-8) if (genomewide) { # use bonferroni since might be only a small region? genomewideline = -log10(0.05/length(pvals)) } - chro = sub('chr','',chrom, ignore.case = T) # just in case - chro = sub(':','',chro, ignore.case = T) # ugh - chro = sub('X',23,chro, ignore.case = T) - chro = sub('Y',24,chro, ignore.case = T) - chro = sub('Mt',25,chro, ignore.case = T) offset = as.integer(offset) pvals = as.double(pvals) - chro = as.integer(chro) + chro = as.integer(chrom) # already dealt with X and friends? d=data.frame(CHR=chro,BP=offset,P=pvals) #limit to only chrs 1-22, x=23,y=24,Mt=25? d=d[d$CHR %in% 1:25, ] @@ -100,6 +95,7 @@ ticks=NULL lastbase=0 chrlist = unique(d$CHR) + chrlist = sort(chrlist) # returns lexical ordering nchr = length(chrlist) # may be any number? if (nchr >= 2) { for (x in c(1:nchr)) { @@ -182,16 +178,23 @@ qq } -rgqqMan = function(infile="/data/tmp/tmpTPXdE1/database/files/000/dataset_1.dat",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), +rgqqMan = function(infile="/data/tmp/tmpM8NZ50/database/files/000/dataset_1.dat",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), title="rgManQQtest1",grey=0) { rawd = read.table(infile,head=T,sep='\t') dn = names(rawd) cc = dn[chromcolumn] oc = dn[offsetcolumn] -nams = c(cc,oc) +rawd[,cc] = sub('chr','',rawd[,cc],ignore.case = T) # just in case +rawd[,cc] = sub(':','',rawd[,cc],ignore.case = T) # ugh +rawd[,cc] = sub('X',23,rawd[,cc],ignore.case = T) +rawd[,cc] = sub('Y',24,rawd[,cc],ignore.case = T) +rawd[,cc] = sub('Mt',25,rawd[,cc], ignore.case = T) +nams = c(cc,oc) # for sorting plen = length(rawd[,1]) -doreorder=1 print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' ')) +rawd = rawd[do.call(order,rawd[nams]),] +# mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.... +# in case not yet ordered if (plen > 0) { for (pvalscolumn in pvalscolumns) { if (pvalscolumn > 0) @@ -203,12 +206,6 @@ ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=6,height=4,dpi=100) print(paste('## qqplot on',cname,'done')) if ((chromcolumn > 0) & (offsetcolumn > 0)) { - if (doreorder) { - rawd = rawd[do.call(order,rawd[nams]),] - # mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.... - # in case not yet ordered - doreorder = 0 - } print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) print(paste('## manhattan plot on',cname,'done')) @@ -231,6 +228,6 @@ </pre> -<b><a href="http://rgenetics.org">Galaxy Rgenetics</a> tool output rgManQQ.py run at 20/07/2011 12:08:46</b><br/> +<b><a href="http://rgenetics.org">Galaxy Rgenetics</a> tool output rgManQQ.py run at 20/07/2011 13:29:43</b><br/></div></body></html> --- a/tools/rgenetics/rgManQQ.py Tue Jul 19 22:18:40 2011 -0400 +++ b/tools/rgenetics/rgManQQ.py Wed Jul 20 13:34:27 2011 +1000 @@ -1,4 +1,6 @@ #!/usr/local/bin/python +# updated july 20 to fix sort order - R unique() sorts into strict collating order +# so need to sort after unique to revert to lexicographic order for x axis on Manhattan # rgmanqq updated july 19 to deal with x,y and mt # lots of fixes # ross lazarus @@ -39,14 +41,9 @@ genomewideline=NULL # was genomewideline=-log10(5e-8) if (genomewide) { # use bonferroni since might be only a small region? genomewideline = -log10(0.05/length(pvals)) } - chro = sub('chr','',chrom, ignore.case = T) # just in case - chro = sub(':','',chro, ignore.case = T) # ugh - chro = sub('X',23,chro, ignore.case = T) - chro = sub('Y',24,chro, ignore.case = T) - chro = sub('Mt',25,chro, ignore.case = T) offset = as.integer(offset) pvals = as.double(pvals) - chro = as.integer(chro) + chro = as.integer(chrom) # already dealt with X and friends? d=data.frame(CHR=chro,BP=offset,P=pvals) #limit to only chrs 1-22, x=23,y=24,Mt=25? d=d[d$CHR %in% 1:25, ] @@ -58,6 +55,7 @@ ticks=NULL lastbase=0 chrlist = unique(d$CHR) + chrlist = sort(chrlist) # returns lexical ordering nchr = length(chrlist) # may be any number? if (nchr >= 2) { for (x in c(1:nchr)) { @@ -151,10 +149,17 @@ dn = names(rawd) cc = dn[chromcolumn] oc = dn[offsetcolumn] -nams = c(cc,oc) +rawd[,cc] = sub('chr','',rawd[,cc],ignore.case = T) # just in case +rawd[,cc] = sub(':','',rawd[,cc],ignore.case = T) # ugh +rawd[,cc] = sub('X',23,rawd[,cc],ignore.case = T) +rawd[,cc] = sub('Y',24,rawd[,cc],ignore.case = T) +rawd[,cc] = sub('Mt',25,rawd[,cc], ignore.case = T) +nams = c(cc,oc) # for sorting plen = length(rawd[,1]) -doreorder=1 print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' ')) +rawd = rawd[do.call(order,rawd[nams]),] +# mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.... +# in case not yet ordered if (plen > 0) { for (pvalscolumn in pvalscolumns) { if (pvalscolumn > 0) @@ -166,12 +171,6 @@ ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=6,height=4,dpi=100) print(paste('## qqplot on',cname,'done')) if ((chromcolumn > 0) & (offsetcolumn > 0)) { - if (doreorder) { - rawd = rawd[do.call(order,rawd[nams]),] - # mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.... - # in case not yet ordered - doreorder = 0 - } print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) print(paste('## manhattan plot on',cname,'done')) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket