details: http://www.bx.psu.edu/hg/galaxy/rev/e1b63ac74b35 changeset: 3593:e1b63ac74b35 user: fubar: ross Lazarus at gmail period com date: Thu Apr 01 14:25:27 2010 -0400 description: Add rgtest.sh to generate test outputs Minor changes to rgManQQ tool documentation Minor fixes to rgQQ diffstat: tools/rgenetics/rgManQQ.py | 41 ++++++--- tools/rgenetics/rgManQQ.xml | 3 + tools/rgenetics/rgQC.py | 14 +++ tools/rgenetics/rgtest.sh | 182 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 227 insertions(+), 13 deletions(-) diffs (343 lines): diff -r 40e8c99829e0 -r e1b63ac74b35 tools/rgenetics/rgManQQ.py --- a/tools/rgenetics/rgManQQ.py Thu Apr 01 13:08:48 2010 -0400 +++ b/tools/rgenetics/rgManQQ.py Thu Apr 01 14:25:27 2010 -0400 @@ -70,8 +70,9 @@ } if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y - if (maxy<8) maxy=8 - + maxy = max(maxy,1.1*genomewideline) + # if (maxy<8) maxy=8 + # only makes sense if genome wide is assumed - we could have a fine mapping region? if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ] plot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) @@ -83,11 +84,13 @@ plot=plot + opts(title=title) plot=plot+opts( panel.background=theme_blank(), - panel.grid.minor=theme_blank(), axis.text.x=theme_text(size=size.x.labels, colour="grey50"), axis.text.y=theme_text(size=size.y.labels, colour="grey50"), axis.ticks=theme_segment(colour=NA) ) + #plot = plot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank()) + #plot = plot + opts(panel.grid.major=theme_blank()) + if (suggestiveline) plot=plot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) if (genomewideline) plot=plot+geom_hline(yintercept=genomewideline,colour="red") plot @@ -120,11 +123,11 @@ # instantiate rcode2 string with infile,chromcol,offsetcol,pvalscols,title before saving and running rcode2 = """rgqqMan = function(infile="%s",chromcolumn=%d, offsetcolumn=%d, pvalscolumns=%s, - title="%s",grey=%d) { - d = read.table(infile,head=T,sep='\t') - print(paste('###',length(d[,1]),'values read from',infile,'read - now running plots',sep=' ')) - for (pvalscolumn in pvalscolumns) { - if (pvalscolumn > 0) +title="%s",grey=%d) { +d = read.table(infile,head=T,sep='\t') +print(paste('###',length(d[,1]),'values read from',infile,'read - now running plots',sep=' ')) +for (pvalscolumn in pvalscolumns) { +if (pvalscolumn > 0) { cname = names(d)[pvalscolumn] mytitle = paste('p=',cname,', ',title,sep='') @@ -154,12 +157,24 @@ """ +def doManQQ(input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir): + """ draw a qq for pvals and a manhattan plot if chrom/offset <> 0 + contains some R scripts as text strings - we substitute defaults into the calls + to make them do our bidding - and save the resulting code for posterity + this can be called externally, I guess...for QC eg? + """ + + rcmd = '%s%s' % (rcode,rcode2 % (input_fname,chrom_col,offset_col,pval_cols,title,grey)) + rlog,flist = RRun(rcmd=rcmd,title=ctitle,outdir=outdir) + return rlog,flist + + def main(): u = """<command interpreter="python"> rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col' </command> """ - print >> sys.stdout,'## rgManQQ.py. cl=',sys.argv + print >> sys.stdout,'## rgManQQ.py. cl= \n%s' % ' '.join(['"%s"' % x for x in sys.argv]) npar = 8 if len(sys.argv) < npar: print >> sys.stdout, '## error - too few command line parameters - wanting %d' % npar @@ -177,13 +192,13 @@ except: chrom_col = 0 try: - offset_col = int(sys.argv[6]) + 1 + offset_col = int(sys.argv[6]) + 1 except: offset_col = 0 p = sys.argv[7].strip().split(',') try: p = [int(x)+1 for x in p] - pval_cols = 'c(%s)' % ','.join(map(str,p)) + pval_cols = 'c(%s)' % ','.join(map(str,p)) except: pval_cols = 'c(0)' if chrom_col == 1 or offset_col == 1: # was passed as zero - do not do manhattan plots @@ -192,8 +207,7 @@ grey = 0 if (sys.argv[8].lower() in ['1','true']): grey = 1 - rcmd = '%s%s' % (rcode,rcode2 % (input_fname,chrom_col,offset_col,pval_cols,title,grey)) - rlog,flist = RRun(rcmd=rcmd,title=ctitle,outdir=outdir) + rlog,flist = doManQQ(input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir) flist.sort() html = [galhtmlprefix % progname,] html.append('<h1>%s</h1>' % title) @@ -221,6 +235,7 @@ htmlf.write('\n') htmlf.close() + if __name__ == "__main__": main() diff -r 40e8c99829e0 -r e1b63ac74b35 tools/rgenetics/rgManQQ.xml --- a/tools/rgenetics/rgManQQ.xml Thu Apr 01 13:08:48 2010 -0400 +++ b/tools/rgenetics/rgManQQ.xml Thu Apr 01 14:25:27 2010 -0400 @@ -66,6 +66,9 @@ - **Offset Column** contains the offset within the chromosome - **P Value Column** contains the (untransformed) p values at that locus - choose multiple columns if needed +NOTE - plotting millions of p values may take tens of minutes depending on +how busy the server is - be patient please. + ----- .. class:: infomark diff -r 40e8c99829e0 -r e1b63ac74b35 tools/rgenetics/rgQC.py --- a/tools/rgenetics/rgQC.py Thu Apr 01 13:08:48 2010 -0400 +++ b/tools/rgenetics/rgQC.py Thu Apr 01 14:25:27 2010 -0400 @@ -37,12 +37,25 @@ # note no ped file passed so had to remove the -l option # for plinkParse.py that makes a heterozygosity report from the ped # file - needs fixing... +# new: importing manhattan/qqplot plotter +# def doManQQ(input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir): +# """ draw a qq for pvals and a manhattan plot if chrom/offset <> 0 +# contains some R scripts as text strings - we substitute defaults into the calls +# to make them do our bidding - and save the resulting code for posterity +# this can be called externally, I guess...for QC eg? +# """ +# +# rcmd = '%s%s' % (rcode,rcode2 % (input_fname,chrom_col,offset_col,pval_cols,title,grey)) +# rlog,flist = RRun(rcmd=rcmd,title=ctitle,outdir=outdir) +# return rlog,flist + from optparse import OptionParser import sys,os,shutil, glob, math, subprocess, time, operator, random, tempfile, copy, string from os.path import abspath from rgutils import galhtmlprefix, galhtmlpostfix, RRun, timenow, plinke, rexe, runPlink, pruneLD +import rgManQQ prog = os.path.split(sys.argv[0])[1] vers = '0.4 april 2009 rml' @@ -57,6 +70,7 @@ mogresize = "x300" # this controls the width for jpeg thumbnails + def makePlots(markers=[],subjects=[],newfpath='.',basename='test',nbreaks='20',nup=3,height=10,width=8,rgbin=''): """ diff -r 40e8c99829e0 -r e1b63ac74b35 tools/rgenetics/rgtest.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/rgenetics/rgtest.sh Thu Apr 01 14:25:27 2010 -0400 @@ -0,0 +1,182 @@ +#!/bin/sh +# script to generate all functional test outputs for each rgenetics tool +# could be run at installation to ensure all dependencies are in place? +GALAXYROOT=`pwd` +echo "using $GALAXYROOT" +# change this as needed for your local install +INPATH="${GALAXYROOT}/test-data" +BINPATH="${GALAXYROOT}/tool-data/rg/bin" +TOOLPATH="${GALAXYROOT}/tools/rgenetics" +OROOT="${GALAXYROOT}/test-data/rgtestouts" +NORMALOROOT="${GALAXYROOT}/test-data" +mkdir -p $OROOT +rm -rf $OROOT/* +# needed for testing - but tool versions should be bumped if this is rerun? +TOOL="rgManQQ" +NPRE=${TOOL}test1 +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +CL="python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 5,7 0" +# rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' +# '$pval_col' +#python /opt/galaxy/tools/rgenetics/rgManQQ.py /opt/galaxy/test-data/smallwgaP.xls rgManQQtest1 +#/opt/galaxy/test-data/rgtestouts/rgManQQ/rgManQQtest1.html /opt/galaxy/test-data/rgtestouts/rgManQQ 1 2 5,7 +echo "Testing $TOOL using $CL" +python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 5,7 0 +TOOL="rgfakePhe" +NPRE=${TOOL}test1 +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +PSSCRIPT="$OUTPATH/script_file" +echo "{'pN':'normtest','pT':'rnorm','pP':\"{'Mean':'100', 'SD':'10'}\"}" > $PSSCRIPT +echo "{'pN':'cattest','pT':'cat','pP':\"{'values':'red,green,blue'}\"}" >> $PSSCRIPT +echo "{'pN':'uniftest','pT':'$f.series.phetype','pP':\"{'low':'1','hi':'100'}\"}" >> $PSSCRIPT +echo "{'pN':'gammatest','pT':'rgamma','pP':\"{'Alpha':'1', 'Beta':'0.1'}\"}" >> $PSSCRIPT +echo "{'pN':'poissontest','pT':'poisson','pP':\"{'lamb':'1.0',}\"}" >> $PSSCRIPT +echo "{'pN':'exptest','pT':'exponential','pP':\"{'Mean':'100.0',}\"}" >> $PSSCRIPT +echo "{'pN':'weibtest','pT':'weibull','pP':\"{'Alpha':'1.0', 'Beta':'0.1'}\"}" >> $PSSCRIPT +echo "now doing $TOOL" +python $TOOLPATH/$TOOL.py ${INPATH}/tinywga $NPRE $NPRE.pphe $OUTPATH $PSSCRIPT +# <command interpreter="python">rgfakePhe.py '$infile1.extra_files_path/$infile1.metadata.base_name' +# "$title1" '$ppheout' '$ppheout.files_path' '$script_file' +# +# +TOOL="rgQC" +NPRE=${TOOL}test1 +echo "now doing $TOOL" +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +python $TOOLPATH/$TOOL.py -i "$INPATH/tinywga" -o $NPRE -s ${OUTPATH}/${NPRE}.html -p $OUTPATH +# rgQC.py -i '$input_file.extra_files_path/$input_file.metadata.base_name' -o "$out_prefix" +# -s '$html_file' -p '$html_file.files_path' +# +TOOL="rgGRR" +NPRE=${TOOL}test1 +echo "now doing $TOOL" +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +python $TOOLPATH/$TOOL.py "$INPATH/tinywga" "tinywga" $OUTPATH/${NPRE}.html $OUTPATH "$NPRE" '100' '6' +# rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" +#'$out_file1' '$out_file1.files_path' "$title" '$n' '$Z' +# +TOOL="rgLDIndep" +NPRE=${TOOL}test1 +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +python $TOOLPATH/$TOOL.py "$INPATH" "tinywga" "$NPRE" 1 1 0 0 1 1 $OUTPATH/${NPRE}.pbed $OUTPATH 10000 5000 0.1 +#rgLDIndep.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind' +# '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1' +#'$out_file1.files_path' '$window' '$step' '$r2' +TOOL="rgPedSub" +NPRE=${TOOL}test1 +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +PSSCRIPT="$OUTPATH/pedsub.script" +echo "title~~~~$NPRE" > $PSSCRIPT +echo "output1~~~~${OUTPATH}/${NPRE}.lped" >> $PSSCRIPT +echo "outformat~~~~lped" >> $PSSCRIPT +echo "basename~~~~tinywga" >> $PSSCRIPT +echo "inped~~~~$INPATH/tinywga" >> $PSSCRIPT +echo "outdir~~~~$OUTPATH" >> $PSSCRIPT +echo "region~~~~" >> $PSSCRIPT +echo "relfilter~~~~all" >> $PSSCRIPT +echo "rslist~~~~rs2283802Xrs2267000Xrs16997606Xrs4820537Xrs3788347Xrs756632Xrs4820539Xrs2283804Xrs2267006Xrs4822363X" >> $PSSCRIPT +echo "now doing $TOOL" +python $TOOLPATH/$TOOL.py $PSSCRIPT +rm -rf $PSSCRIPT +# +TOOL="rgfakePhe" +NPRE=${TOOL}test1 +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +PSSCRIPT="$OUTPATH/script_file" +echo "{'pN':'normtest','pT':'rnorm','pP':\"{'Mean':'100', 'SD':'10'}\"}" > $PSSCRIPT +echo "{'pN':'cattest','pT':'cat','pP':\"{'values':'red,green,blue'}\"}" >> $PSSCRIPT +echo "{'pN':'uniftest','pT':'$f.series.phetype','pP':\"{'low':'1','hi':'100'}\"}" >> $PSSCRIPT +echo "{'pN':'gammatest','pT':'rgamma','pP':\"{'Alpha':'1', 'Beta':'0.1'}\"}" >> $PSSCRIPT +echo "{'pN':'poissontest','pT':'poisson','pP':\"{'lamb':'1.0',}\"}" >> $PSSCRIPT +echo "{'pN':'exptest','pT':'exponential','pP':\"{'Mean':'100.0',}\"}" >> $PSSCRIPT +echo "{'pN':'weibtest','pT':'weibull','pP':\"{'Alpha':'1.0', 'Beta':'0.1'}\"}" >> $PSSCRIPT +echo "now doing $TOOL" +python $TOOLPATH/$TOOL.py $PSSCRIPT +# +echo "Now doing rgclean" +TOOL="rgClean" +NPRE=${TOOL}test1 +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +python $TOOLPATH/$TOOL.py $INPATH "tinywga" "$NPRE" 1 1 0 0 1 1 $OUTPATH/${NPRE}.pbed $OUTPATH 0 0 0 0 +# rgClean.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind' +# '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1' '$out_file1.files_path' +# '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' '$relfilter' '$afffilter' '$sexfilter' '$fixaff' +# +echo "Now doing rgEigPCA" +TOOL="rgEigPCA" +NPRE=${TOOL}test1 +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +python $TOOLPATH/$TOOL.py "$INPATH/tinywga" "$NPRE" ${OUTPATH}/${NPRE}.html $OUTPATH 4 2 2 2 $OUTPATH/rgEigPCAtest1.txt +# rgEigPCA.py "$i.extra_files_path/$i.metadata.base_name" "$title" "$out_file1" +# "$out_file1.files_path" "$k" "$m" "$t" "$s" "$pca" +# +TOOL="rgfakePed" +NPRE=${TOOL}test1 +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +echo "now doing $TOOL" +python $TOOLPATH/$TOOL.py --title "$NPRE" -o $OUTPATH/${NPRE}.lped -p $OUTPATH -c "20" -n "40" -s "10" -w "0" -v "0" -l "pbed" -d "T" -m "0" -M "0" +#rgfakePed.py --title '$title1' +# -o '$out_file1' -p '$out_file1.extra_files_path' -c '$ncases' -n '$ntotal' +# -s '$nsnp' -w '$lowmaf' -v '$missingValue' -l '$outFormat' +# -d '$mafdist' -m '$missingRate' -M '$mendelRate' +# +TOOL="rgHaploView" +NPRE=${TOOL}test1 +OUTPATH="$OROOT/$TOOL" +mkdir $OUTPATH +echo "now doing $TOOL" +python $TOOLPATH/$TOOL.py "" "rs2283802Xrs2267000Xrs16997606Xrs4820537Xrs3788347Xrs756632Xrs4820539Xrs2283804Xrs2267006Xrs4822363X" \ +"$NPRE" $OUTPATH/${NPRE}.html "$INPATH" "tinywga" 0.0 200000 "RSQ" "lo" "2048" "$OUTPATH" "noinfo" "0.8" \ +"YRI" $BINPATH/haploview.jar +# rgHaploView.py "$ucsc_region" "$rslist" "$title" "$output1" +# "$lhistIn.extra_files_path" "$lhistIn.metadata.base_name" +# "$minmaf" "$maxdist" "$ldtype" "$hires" "$memsize" "$output1.files_path" +# "$infoTrack" "$tagr2" "$hmpanel" ${GALAXY_DATA_INDEX_DIR}/rg/bin/haploview.jar +# note these statistical tools do NOT generate composite outputs +TOOL="rgGLM" +NPRE=${TOOL}test1 +OUTPATH=$NORMALOROOT +echo "now doing $TOOL" +python $TOOLPATH/$TOOL.py "$INPATH/tinywga" $INPATH/tinywga "$NPRE" "c1" "" $OUTPATH/${NPRE}_GLM.xls \ +$OUTPATH/${NPRE}_GLM_log.txt "tinywga" "" "" "" 1 1 0 0 $OUTPATH/${NPRE}_GLM_topTable.gff +## rgGLM.py '$i.extra_files_path/$i.metadata.base_name' '$phef.extra_files_path/$phef.metadata.base_name' +## "$title1" '$predvar' '$covar' '$out_file1' '$logf' '$dbkey' '$i.metadata.base_name' +## '$inter' '$cond' '$gender' '$mind' '$geno' '$maf' '$logistic' '$gffout' +# +TOOL="rgTDT" +NPRE=${TOOL}test1 +OUTPATH=$NORMALOROOT +echo "now doing $TOOL" +python $TOOLPATH/$TOOL.py -i "$INPATH/tinywga" -o "$NPRE" -r $OUTPATH/${NPRE}_TDT.xls \ +-l $OUTPATH/${NPRE}_TDT_log.txt -g $OUTPATH/${NPRE}_TDT_topTable.gff +## rgTDT.py -i '$infile.extra_files_path/$infile.metadata.base_name' -o '$title' +## -r '$out_file1' -l '$logf' -x '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' +## -g '$gffout' +# +TOOL="rgCaCo" +NPRE=${TOOL}test1 +OUTPATH=$NORMALOROOT +echo "now doing $TOOL" +python $TOOLPATH/rgCaCo.py $INPATH/tinywga "$NPRE" $OUTPATH/${NPRE}_CaCo.xls $OUTPATH/${NPRE}_CaCo_log.txt $OUTPATH $OUTPATH/${NPRE}_CaCo_topTable.gff +# echo tp=$TOOLPATH t=$TOOL op=$OUTPATH b=$BINPATH +# rgCaCo.py '$i.extra_files_path/$i.metadata.base_name' "$name" '$out_file1' '$logf' '$logf.files_path' '$gffout' +# +TOOL="rgQQ" +echo "now doing $TOOL" +NPRE=${TOOL}test1 +OUTPATH=$NORMALOROOT +CL="python $TOOLPATH/$TOOL.py "$INPATH/tinywga.pphe" "$NPRE" 1 3 $OUTPATH/$NPRE.pdf 8 10 "false" 1 $OUTPATH" +echo "running $TOOL using $CL" +python $TOOLPATH/$TOOL.py "$INPATH/tinywga.pphe" "$NPRE" 1 3 $OUTPATH/$NPRE.pdf 8 10 "false" 1 $OUTPATH +# rgQQ.py "$input1" "$name" $sample "$cols" $allqq $height $width $log $allqq.id $__new_file_path__ +#