details: http://www.bx.psu.edu/hg/galaxy/rev/0b682d3dd01b changeset: 3633:0b682d3dd01b user: fubar: ross Lazarus at gmail period com date: Tue Apr 13 11:15:35 2010 -0400 description: Add the raw uncompressed .js files needed for rgGRR svg display - the packed ones were already there Fix rgManQQ so the manhattan plot is not called for data without chromosome and offset Use LD reduced data for IBD/GRR for faster performance and better resolution of close relationships Add an LD pruning and thinning step to rgGRR - the target composite pbed or lped has the thinned files permanently added for reuse. This is skipped for small numbers of markers since the plink --thin 0.1 option applied to tinywga.pbed will return an empty file which is not really ideal. diffstat: static/scripts/checkbox_and_radiobutton.js | 347 ++++ static/scripts/helper_functions.js | 817 ++++++++++ static/scripts/timer.js | 74 + tools/rgenetics/rgGRR.py | 2241 ++++++++++++++------------- tools/rgenetics/rgManQQ.py | 19 +- 5 files changed, 2396 insertions(+), 1102 deletions(-) diffs (truncated from 3540 to 3000 lines): diff -r 0dc1fc63c945 -r 0b682d3dd01b static/scripts/checkbox_and_radiobutton.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/scripts/checkbox_and_radiobutton.js Tue Apr 13 11:15:35 2010 -0400 @@ -0,0 +1,347 @@ +/* +Scripts to create interactive checkboxes and radio buttons in SVG using ECMA script +Copyright (C) <2007> <Andreas Neumann> +Version 1.1.3, 2007-08-09 +neumann@karto.baug.ethz.ch +http://www.carto.net/ +http://www.carto.net/neumann/ + +Credits: +* Guy Morton for providing a fix to let users toggle checkboxes by clicking on text labels +* Bruce Rindahl for providing the bugfix described in version 1.1.2 +* Simon Shutter for providing a fix for the ASV in IE crash when reloading the SVG file after calling the .remove() method on a checkbox + +---- + +Documentation: http://www.carto.net/papers/svg/gui/checkbox_and_radiobutton/ + +---- + +current version: 1.1.3 + +version history: +1.0 (2006-03-13) +initial version + +1.1 (2006-07-11) +text labels are now clickable (thanks to Guy Morton) +added method .moveTo() to move checkbox to a different location +introduced new constructor parameter labelYOffset to allow more flexible placement of the text label + +1.1.1 (2007-02-06) +added cursor pointer to the text label and use element representing the checkBox + +1.1.2 (2007-04-19) +bug fix: this.selectedIndex was not correctly initialized in method addCheckBox of the radioButtonGroup object + +1.1.3 (2007-08-09) +bug fix: the method .remove() was slightly modified (using removeEventListener) for avoiding a crash related to the method after reloading the SVG file + +------- + + +This ECMA script library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library (lesser_gpl.txt); if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +---- + +original document site: http://www.carto.net/papers/svg/gui/checkbox_and_radiobutton/ +Please contact the author in case you want to use code or ideas commercially. +If you use this code, please include this copyright header, the included full +LGPL 2.1 text and read the terms provided in the LGPL 2.1 license +(http://www.gnu.org/copyleft/lesser.txt) + +------------------------------- + +Please report bugs and send improvements to neumann@karto.baug.ethz.ch +If you use this control, please link to the original (http://www.carto.net/papers/svg/gui/checkbox_and_radiobutton/) +somewhere in the source-code-comment or the "about" of your project and give credits, thanks! + +*/ + +function checkBox(id,parentNode,x,y,checkboxId,checkcrossId,checkedStatus,labelText,textStyles,labelDistance,labelYOffset,radioButtonGroup,functionToCall) { + var nrArguments = 13; + var createCheckbox= true; + if (arguments.length == nrArguments) { + this.id = id; //an internal id, this id is not used in the SVG Dom tree + this.parentNode = parentNode; //the parentNode, string or nodeReference + this.x = x; //the center of the checkBox + this.y = y; //the center of the checkBox + this.checkboxId = checkboxId; //the id of the checkbox symbol (background) + this.checkcrossId = checkcrossId; //the id of the checkbox symbol (foreground), pointer-events should be set to "none" + this.checkedStatus = checkedStatus; //a status variable (true|false), indicates if checkbox is on or off + this.labelText = labelText; //the text of the checkbox label to be displayed, use undefined or empty string if you don't need a label text + this.textStyles = textStyles; //an array of literals containing the text settings + if (!this.textStyles["font-size"]) { + this.textStyles["font-size"] = 12; + } + this.labelDistance = labelDistance; //a distance defined from the center of the checkbox to the left of the text of the label + this.labelYOffset = labelYOffset; //a y offset value for the text label in relation to the checkbox symbol center + this.radioButtonGroup = radioButtonGroup; //a reference to a radio button group, if this is a standalone checkBox, just use the parameter undefined + this.functionToCall = functionToCall; //the function to call after triggering checkBox + this.exists = true; //status that indicates if checkbox exists or not, is set to false after method .remove() was called + this.label = undefined; //later a reference to the label text node + } + else { + createCheckbox = false; + alert("Error in checkbox ("+id+"): wrong nr of arguments! You have to pass over "+nrArguments+" parameters."); + } + if (createCheckbox) { + //timer stuff + this.timer = new Timer(this); //a Timer instance for calling the functionToCall + if (this.radioButtonGroup) { + this.timerMs = 0; + } + else { + this.timerMs = 200; //a constant of this object that is used in conjunction with the timer - functionToCall is called after 200 ms + } + //create checkbox + this.createCheckBox(); + } + else { + alert("Could not create checkbox with id '"+id+"' due to errors in the constructor parameters"); + } +} + +//this method creates all necessary checkbox geometry +checkBox.prototype.createCheckBox = function() { + if (typeof(this.parentNode) == "string") { + this.parentNode = document.getElementById(this.parentNode); + } + //create checkbox + this.checkBox = document.createElementNS(svgNS,"use"); + this.checkBox.setAttributeNS(null,"x",this.x); + this.checkBox.setAttributeNS(null,"y",this.y); + this.checkBox.setAttributeNS(xlinkNS,"href","#"+this.checkboxId); + this.checkBox.addEventListener("click",this,false); + this.checkBox.setAttributeNS(null,"cursor","pointer"); + this.parentNode.appendChild(this.checkBox); + //create checkcross + this.checkCross = document.createElementNS(svgNS,"use"); + this.checkCross.setAttributeNS(null,"x",this.x); + this.checkCross.setAttributeNS(null,"y",this.y); + this.checkCross.setAttributeNS(xlinkNS,"href","#"+this.checkcrossId); + this.parentNode.appendChild(this.checkCross); + if (this.checkedStatus == false) { + this.checkCross.setAttributeNS(null,"display","none"); + } + //create label, if any + if (this.labelText) { + if (this.labelText.length > 0) { + this.label = document.createElementNS(svgNS,"text"); + for (var attrib in this.textStyles) { + var value = this.textStyles[attrib]; + if (attrib == "font-size") { + value += "px"; + } + this.label.setAttributeNS(null,attrib,value); + } + this.label.setAttributeNS(null,"x",(this.x + this.labelDistance)); + this.label.setAttributeNS(null,"y",(this.y + this.labelYOffset)); + this.label.setAttributeNS(null,"cursor","pointer"); + var labelTextNode = document.createTextNode(this.labelText); + this.label.appendChild(labelTextNode); + this.label.setAttributeNS(null,"pointer-events","all"); + this.label.addEventListener("click",this,false); + this.parentNode.appendChild(this.label); + } + } + if (this.radioButtonGroup) { + this.radioButtonGroup.addCheckBox(this); + } +} + +checkBox.prototype.handleEvent = function(evt) { + if (evt.type == "click") { + if (this.checkedStatus == true) { + this.checkCross.setAttributeNS(null,"display","none"); + this.checkedStatus = false; + } + else { + this.checkCross.setAttributeNS(null,"display","inline"); + this.checkedStatus = true; + } + } + this.timer.setTimeout("fireFunction",this.timerMs); +} + +checkBox.prototype.fireFunction = function() { + if (this.radioButtonGroup) { + this.radioButtonGroup.selectById(this.id,true); + } + else { + if (typeof(this.functionToCall) == "function") { + this.functionToCall(this.id,this.checkedStatus,this.labelText); + } + if (typeof(this.functionToCall) == "object") { + this.functionToCall.checkBoxChanged(this.id,this.checkedStatus,this.labelText); + } + if (typeof(this.functionToCall) == undefined) { + return; + } + } +} + +checkBox.prototype.check = function(FireFunction) { + this.checkCross.setAttributeNS(null,"display","inherit"); + this.checkedStatus = true; + if (FireFunction) { + this.timer.setTimeout("fireFunction",this.timerMs); + } +} + +checkBox.prototype.uncheck = function(FireFunction) { + this.checkCross.setAttributeNS(null,"display","none"); + this.checkedStatus = false; + if (FireFunction) { + this.timer.setTimeout("fireFunction",this.timerMs); + } +} + +//move checkbox to a different position +checkBox.prototype.moveTo = function(moveX,moveY) { + this.x = moveX; + this.y = moveY; + //move checkbox + this.checkBox.setAttributeNS(null,"x",this.x); + this.checkBox.setAttributeNS(null,"y",this.y); + //move checkcross + this.checkCross.setAttributeNS(null,"x",this.x); + this.checkCross.setAttributeNS(null,"y",this.y); + //move text label + if (this.labelText) { + this.label.setAttributeNS(null,"x",(this.x + this.labelDistance)); + this.label.setAttributeNS(null,"y",(this.y + this.labelYOffset)); + } +} + +checkBox.prototype.remove = function(FireFunction) { + this.checkBox.removeEventListener("click",this,false); + this.parentNode.removeChild(this.checkBox); + this.parentNode.removeChild(this.checkCross); + if (this.label) { + this.parentNode.removeChild(this.label); + } + this.exists = false; +} + +checkBox.prototype.setLabelText = function(labelText) { + this.labelText = labelText + if (this.label) { + this.label.firstChild.nodeValue = labelText; + } + else { + if (this.labelText.length > 0) { + this.label = document.createElementNS(svgNS,"text"); + for (var attrib in this.textStyles) { + value = this.textStyles[attrib]; + if (attrib == "font-size") { + value += "px"; + } + this.label.setAttributeNS(null,attrib,value); + } + this.label.setAttributeNS(null,"x",(this.x + this.labelDistance)); + this.label.setAttributeNS(null,"y",(this.y + this.textStyles["font-size"] * 0.3)); + var labelTextNode = document.createTextNode(this.labelText); + this.label.appendChild(labelTextNode); + this.parentNode.appendChild(this.label); + } + } +} + +/* start of the radioButtonGroup object */ + +function radioButtonGroup(id,functionToCall) { + var nrArguments = 2; + if (arguments.length == nrArguments) { + this.id = id; + if (typeof(functionToCall) == "function" || typeof(functionToCall) == "object" || typeof(functionToCall) == undefined) { + this.functionToCall = functionToCall; + } + else { + alert("Error in radiobutton with ("+id+"): argument functionToCall is not of type 'function', 'object' or undefined!"); + } + this.checkBoxes = new Array(); //this array will hold checkbox objects + this.selectedId = undefined; //holds the id of the active radio button + this.selectedIndex = undefined; //holds the index of the active radio button + //timer stuff + this.timer = new Timer(this); //a Timer instance for calling the functionToCall + this.timerMs = 200; //a constant of this object that is used in conjunction with the timer - functionToCall is called after 200 ms + } + else { + alert("Error in radiobutton with ("+id+"): wrong nr of arguments! You have to pass over "+nrArguments+" parameters."); + } +} + +radioButtonGroup.prototype.addCheckBox = function(checkBoxObj) { + this.checkBoxes.push(checkBoxObj); + if (checkBoxObj.checkedStatus) { + this.selectedId = checkBoxObj.id; + this.selectedIndex = this.checkBoxes.length - 1; + } +} + +//change radio button selection by id +radioButtonGroup.prototype.selectById = function(cbId,fireFunction) { + var found = false; + for (var i=0;i<this.checkBoxes.length;i++) { + if (this.checkBoxes[i].id == cbId) { + this.selectedId = cbId; + this.selectedIndex = i; + if (this.checkBoxes[i].checkedStatus == false) { + this.checkBoxes[i].check(false); + } + found = true; + } + else { + this.checkBoxes[i].uncheck(false); + } + } + if (found) { + if (fireFunction) { + this.timer.setTimeout("fireFunction",this.timerMs); + } + } + else { + alert("Error in radiobutton with ("+this.id+"): could not find checkbox with id '"+cbId+"'"); + } +} + +//change radio button selection by label name +radioButtonGroup.prototype.selectByLabelname = function(labelName,fireFunction) { + var id = -1; + for (var i=0;i<this.checkBoxes.length;i++) { + if (this.checkBoxes[i].labelText == labelName) { + id = this.checkBoxes[i].id; + } + } + if (id == -1) { + alert("Error in radiobutton with ("+this.id+"): could not find checkbox with label '"+labelName+"'"); + } + else { + this.selectById(id,fireFunction); + } +} + +radioButtonGroup.prototype.fireFunction = function() { + if (typeof(this.functionToCall) == "function") { + this.functionToCall(this.id,this.selectedId,this.checkBoxes[this.selectedIndex].labelText); + } + if (typeof(this.functionToCall) == "object") { + this.functionToCall.radioButtonChanged(this.id,this.selectedId,this.checkBoxes[this.selectedIndex].labelText); + } + if (typeof(this.functionToCall) == undefined) { + return; + } +} \ No newline at end of file diff -r 0dc1fc63c945 -r 0b682d3dd01b static/scripts/helper_functions.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/scripts/helper_functions.js Tue Apr 13 11:15:35 2010 -0400 @@ -0,0 +1,817 @@ +/** + * @fileoverview + * + * ECMAScript <a href="http://www.carto.net/papers/svg/resources/helper_functions.html">helper functions</a>, main purpose is to serve in SVG mapping or other SVG based web applications + * + * This ECMA script library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library (http://www.carto.net/papers/svg/resources/lesser_gpl.txt); if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Please report bugs and send improvements to neumann@karto.baug.ethz.ch + * If you use these scripts, please link to the original (http://www.carto.net/papers/svg/resources/helper_functions.html) + * somewhere in the source-code-comment or the "about" of your project and give credits, thanks! + * + * See <a href="js_docs_out/overview-summary-helper_functions.js.html">documentation</a>. + * + * @author Andreas Neumann a.neumann@carto.net + * @copyright LGPL 2.1 <a href="http://www.gnu.org/copyleft/lesser.txt">Gnu LGPL 2.1</a> + * @credits Bruce Rindahl, numerous people on svgdevelopers@yahoogroups.com + */ + +//global variables necessary to create elements in these namespaces, do not delete them!!!! + +/** + * This variable is a shortcut to the full URL of the SVG namespace + * @final + * @type String + */ +var svgNS = "http://www.w3.org/2000/svg"; + +/** + * This variable is a shortcut to the full URL of the XLink namespace + * @final + * @type String + */ +var xlinkNS = "http://www.w3.org/1999/xlink"; + +/** + * This variable is a shortcut to the full URL of the attrib namespace + * @final + * @type String + */ +var cartoNS = "http://www.carto.net/attrib"; + +/** + * This variable is a alias to the full URL of the attrib namespace + * @final + * @type String + */ +var attribNS = "http://www.carto.net/attrib"; + +/** + * This variable is a alias to the full URL of the Batik extension namespace + * @final + * @type String + */ +var batikNS = "http://xml.apache.org/batik/ext"; + +/** + * Returns the polar direction from a given vector + * @param {Number} xdiff the x-part of the vector + * @param {Number} ydiff the y-part of the vector + * @return direction the direction in radians + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDist + * @see #toRectX + * @see #toRectY + */ +function toPolarDir(xdiff,ydiff) { + var direction = (Math.atan2(ydiff,xdiff)); + return(direction); +} + +/** + * Returns the polar distance from a given vector + * @param {Number} xdiff the x-part of the vector + * @param {Number} ydiff the y-part of the vector + * @return distance the distance + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDir + * @see #toRectX + * @see #toRectY + */ +function toPolarDist(xdiff,ydiff) { + var distance = Math.sqrt(xdiff * xdiff + ydiff * ydiff); + return(distance); +} + +/** + * Returns the x-part of a vector from a given direction and distance + * @param {Number} direction the direction (in radians) + * @param {Number} distance the distance + * @return x the x-part of the vector + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDist + * @see #toPolarDir + * @see #toRectY + */ +function toRectX(direction,distance) { + var x = distance * Math.cos(direction); + return(x); +} + +/** + * Returns the y-part of the vector from a given direction and distance + * @param {Number} direction the direction (in radians) + * @param {Number} distance the distance + * @return y the y-part of the vector + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDist + * @see #toPolarDir + * @see #toRectX + */ +function toRectY(direction,distance) { + y = distance * Math.sin(direction); + return(y); +} + +/** + * Converts degrees to radians + * @param {Number} deg the degree value + * @return rad the radians value + * @type Number + * @version 1.0 (2007-04-30) + * @see #RadToDeg + */ +function DegToRad(deg) { + return (deg / 180.0 * Math.PI); +} + +/** + * Converts radians to degrees + * @param {Number} rad the radians value + * @return deg the degree value + * @type Number + * @version 1.0 (2007-04-30) + * @see #DegToRad + */ +function RadToDeg(rad) { + return (rad / Math.PI * 180.0); +} + +/** + * Converts decimal degrees to degrees, minutes, seconds + * @param {Number} dd the decimal degree value + * @return degrees the degree values in the following notation: {deg:degrees,min:minutes,sec:seconds} + * @type literal + * @version 1.0 (2007-04-30) + * @see #dms2dd + */ +function dd2dms(dd) { + var minutes = (Math.abs(dd) - Math.floor(Math.abs(dd))) * 60; + var seconds = (minutes - Math.floor(minutes)) * 60; + var minutes = Math.floor(minutes); + if (dd >= 0) { + var degrees = Math.floor(dd); + } + else { + var degrees = Math.ceil(dd); + } + return {deg:degrees,min:minutes,sec:seconds}; +} + +/** + * Converts degrees, minutes and seconds to decimal degrees + * @param {Number} deg the degree value + * @param {Number} min the minute value + * @param {Number} sec the second value + * @return deg the decimal degree values + * @type Number + * @version 1.0 (2007-04-30) + * @see #dd2dms + */ +function dms2dd(deg,min,sec) { + if (deg < 0) { + return deg - (min / 60) - (sec / 3600); + } + else { + return deg + (min / 60) + (sec / 3600); + } +} + +/** + * log function, missing in the standard Math object + * @param {Number} x the value where the log function should be applied to + * @param {Number} b the base value for the log function + * @return logResult the result of the log function + * @type Number + * @version 1.0 (2007-04-30) + */ +function log(x,b) { + if(b==null) b=Math.E; + return Math.log(x)/Math.log(b); +} + +/** + * interpolates a value (e.g. elevation) bilinearly based on the position within a cell with 4 corner values + * @param {Number} za the value at the upper left corner of the cell + * @param {Number} zb the value at the upper right corner of the cell + * @param {Number} zc the value at the lower right corner of the cell + * @param {Number} zd the value at the lower left corner of the cell + * @param {Number} xpos the x position of the point where a new value should be interpolated + * @param {Number} ypos the y position of the point where a new value should be interpolated + * @param {Number} ax the x position of the lower left corner of the cell + * @param {Number} ay the y position of the lower left corner of the cell + * @param {Number} cellsize the size of the cell + * @return interpol_value the result of the bilinear interpolation function + * @type Number + * @version 1.0 (2007-04-30) + */ +function intBilinear(za,zb,zc,zd,xpos,ypos,ax,ay,cellsize) { //bilinear interpolation function + var e = (xpos - ax) / cellsize; + var f = (ypos - ay) / cellsize; + + //calculation of weights + var wa = (1 - e) * (1 - f); + var wb = e * (1 - f); + var wc = e * f; + var wd = f * (1 - e); + + var interpol_value = wa * zc + wb * zd + wc * za + wd * zb; + return interpol_value; +} + +/** + * tests if a given point is left or right of a given line + * @param {Number} pointx the x position of the given point + * @param {Number} pointy the y position of the given point + * @param {Number} linex1 the x position of line's start point + * @param {Number} liney1 the y position of line's start point + * @param {Number} linex2 the x position of line's end point + * @param {Number} liney2 the y position of line's end point + * @return leftof the result of the leftOfTest, 1 means leftOf, 0 means rightOf + * @type Number (integer, 0|1) + * @version 1.0 (2007-04-30) + */ +function leftOfTest(pointx,pointy,linex1,liney1,linex2,liney2) { + var result = (liney1 - pointy) * (linex2 - linex1) - (linex1 - pointx) * (liney2 - liney1); + if (result < 0) { + var leftof = 1; //case left of + } + else { + var leftof = 0; //case left of + } + return leftof; +} + +/** + * calculates the distance between a given point and a given line + * @param {Number} pointx the x position of the given point + * @param {Number} pointy the y position of the given point + * @param {Number} linex1 the x position of line's start point + * @param {Number} liney1 the y position of line's start point + * @param {Number} linex2 the x position of line's end point + * @param {Number} liney2 the y position of line's end point + * @return distance the result of the leftOfTest, 1 means leftOf, 0 means rightOf + * @type Number + * @version 1.0 (2007-04-30) + */ +function distFromLine(xpoint,ypoint,linex1,liney1,linex2,liney2) { + var dx = linex2 - linex1; + var dy = liney2 - liney1; + var distance = (dy * (xpoint - linex1) - dx * (ypoint - liney1)) / Math.sqrt(Math.pow(dx,2) + Math.pow(dy,2)); + return distance; +} + +/** + * calculates the angle between two vectors (lines) + * @param {Number} ax the x part of vector a + * @param {Number} ay the y part of vector a + * @param {Number} bx the x part of vector b + * @param {Number} by the y part of vector b + * @return angle the angle in radians + * @type Number + * @version 1.0 (2007-04-30) + * @credits <a href="http://www.mathe-online.at/mathint/vect2/i.html#Winkel">Mathe Online (Winkel)</a> + */ +function angleBetwTwoLines(ax,ay,bx,by) { + var angle = Math.acos((ax * bx + ay * by) / (Math.sqrt(Math.pow(ax,2) + Math.pow(ay,2)) * Math.sqrt(Math.pow(bx,2) + Math.pow(by,2)))); + return angle; +} + +/** + * calculates the bisector vector for two given vectors + * @param {Number} ax the x part of vector a + * @param {Number} ay the y part of vector a + * @param {Number} bx the x part of vector b + * @param {Number} by the y part of vector b + * @return c the resulting vector as an Array, c[0] is the x part of the vector, c[1] is the y part + * @type Array + * @version 1.0 (2007-04-30) + * @credits <a href="http://www.mathe-online.at/mathint/vect1/i.html#Winkelsymmetrale">Mathe Online (Winkelsymmetrale)</a> + * see #calcBisectorAngle + * */ +function calcBisectorVector(ax,ay,bx,by) { + var betraga = Math.sqrt(Math.pow(ax,2) + Math.pow(ay,2)); + var betragb = Math.sqrt(Math.pow(bx,2) + Math.pow(by,2)); + var c = new Array(); + c[0] = ax / betraga + bx / betragb; + c[1] = ay / betraga + by / betragb; + return c; +} + +/** + * calculates the bisector angle for two given vectors + * @param {Number} ax the x part of vector a + * @param {Number} ay the y part of vector a + * @param {Number} bx the x part of vector b + * @param {Number} by the y part of vector b + * @return angle the bisector angle in radians + * @type Number + * @version 1.0 (2007-04-30) + * @credits <a href="http://www.mathe-online.at/mathint/vect1/i.html#Winkelsymmetrale">Mathe Online (Winkelsymmetrale)</a> + * see #calcBisectorVector + * */ +function calcBisectorAngle(ax,ay,bx,by) { + var betraga = Math.sqrt(Math.pow(ax,2) + Math.pow(ay,2)); + var betragb = Math.sqrt(Math.pow(bx,2) + Math.pow(by,2)); + var c1 = ax / betraga + bx / betragb; + var c2 = ay / betraga + by / betragb; + var angle = toPolarDir(c1,c2); + return angle; +} + +/** + * calculates the intersection point of two given lines + * @param {Number} line1x1 the x the start point of line 1 + * @param {Number} line1y1 the y the start point of line 1 + * @param {Number} line1x2 the x the end point of line 1 + * @param {Number} line1y2 the y the end point of line 1 + * @return interSectPoint the intersection point, interSectPoint.x contains x-part, interSectPoint.y the y-part of the resulting coordinate + * @type Object + * @version 1.0 (2007-04-30) + * @credits <a href="http://astronomy.swin.edu.au/~pbourke/geometry/lineline2d/">P. Bourke</a> + */ +function intersect2lines(line1x1,line1y1,line1x2,line1y2,line2x1,line2y1,line2x2,line2y2) { + var interSectPoint = new Object(); + var denominator = (line2y2 - line2y1)*(line1x2 - line1x1) - (line2x2 - line2x1)*(line1y2 - line1y1); + if (denominator == 0) { + alert("lines are parallel"); + } + else { + var ua = ((line2x2 - line2x1)*(line1y1 - line2y1) - (line2y2 - line2y1)*(line1x1 - line2x1)) / denominator; + var ub = ((line1x2 - line1x1)*(line1y1 - line2y1) - (line1y2 - line1y1)*(line1x1 - line2x1)) / denominator; + } + interSectPoint["x"] = line1x1 + ua * (line1x2 - line1x1); + interSectPoint["y"] = line1y1 + ua * (line1y2 - line1y1); + return interSectPoint; +} + +/** + * reformats a given number to a string by adding separators at every third digit + * @param {String|Number} inputNumber the input number, can be of type number or string + * @param {String} separator the separator, e.g. ' or , + * @return newString the intersection point, interSectPoint.x contains x-part, interSectPoint.y the y-part of the resulting coordinate + * @type String + * @version 1.0 (2007-04-30) + */ +function formatNumberString(inputNumber,separator) { + //check if of type string, if number, convert it to string + if (typeof(inputNumber) == "Number") { + var myTempString = inputNumber.toString(); + } + else { + var myTempString = inputNumber; + } + var newString=""; + //if it contains a comma, it will be split + var splitResults = myTempString.split("."); + var myCounter = splitResults[0].length; + if (myCounter > 3) { + while(myCounter > 0) { + if (myCounter > 3) { + newString = separator + splitResults[0].substr(myCounter - 3,3) + newString; + } + else { + newString = splitResults[0].substr(0,myCounter) + newString; + } + myCounter -= 3; + } + } + else { + newString = splitResults[0]; + } + //concatenate if it contains a comma + if (splitResults[1]) { + newString = newString + "." + splitResults[1]; + } + return newString; +} + +/** + * writes a status text message out to a SVG text element's first child + * @param {String} statusText the text message to be displayed + * @version 1.0 (2007-04-30) + */ + function statusChange(statusText) { + document.getElementById("statusText").firstChild.nodeValue = "Statusbar: " + statusText; +} + +/** + * scales an SVG element, requires that the element has an x and y attribute (e.g. circle, ellipse, use element, etc.) + * @param {dom::Event} evt the evt object that triggered the scaling + * @param {Number} factor the scaling factor + * @version 1.0 (2007-04-30) + */ +function scaleObject(evt,factor) { + //reference to the currently selected object + var element = evt.currentTarget; + var myX = element.getAttributeNS(null,"x"); + var myY = element.getAttributeNS(null,"y"); + var newtransform = "scale(" + factor + ") translate(" + (myX * 1 / factor - myX) + " " + (myY * 1 / factor - myY) +")"; + element.setAttributeNS(null,'transform', newtransform); +} + +/** + * returns the transformation matrix (ctm) for the given node up to the root element + * the basic use case is to provide a wrapper function for the missing SVGLocatable.getTransformToElement method (missing in ASV3) + * @param {svg::SVGTransformable} node the node reference for the SVGElement the ctm is queried + * @return CTM the current transformation matrix from the given node to the root element + * @type svg::SVGMatrix + * @version 1.0 (2007-05-01) + * @credits <a href="http://www.kevlindev.com/tutorials/basics/transformations/toUserSpace/index.htm">Kevin Lindsey (toUserSpace)</a> + * @see #getTransformToElement + */ +function getTransformToRootElement(node) { + try { + //this part is for fully conformant players (like Opera, Batik, Firefox, Safari ...) + var CTM = node.getTransformToElement(document.documentElement); + } + catch (ex) { + //this part is for ASV3 or other non-conformant players + // Initialize our CTM the node's Current Transformation Matrix + var CTM = node.getCTM(); + // Work our way through the ancestor nodes stopping at the SVG Document + while ( ( node = node.parentNode ) != document ) { + // Multiply the new CTM to the one with what we have accumulated so far + CTM = node.getCTM().multiply(CTM); + } + } + return CTM; +} + +/** + * returns the transformation matrix (ctm) for the given dom::Node up to a different dom::Node + * the basic use case is to provide a wrapper function for the missing SVGLocatable.getTransformToElement method (missing in ASV3) + * @param {svg::SVGTransformable} node the node reference for the element the where the ctm should be calculated from + * @param {svg::SVGTransformable} targetNode the target node reference for the element the ctm should be calculated to + * @return CTM the current transformation matrix from the given node to the target element + * @type svg::SVGMatrix + * @version 1.0 (2007-05-01) + * @credits <a href="http://www.kevlindev.com/tutorials/basics/transformations/toUserSpace/index.htm">Kevin Lindsey (toUserSpace)</a> + * @see #getTransformToRootElement + */ +function getTransformToElement(node,targetNode) { + try { + //this part is for fully conformant players + var CTM = node.getTransformToElement(targetNode); + } + catch (ex) { + //this part is for ASV3 or other non-conformant players + // Initialize our CTM the node's Current Transformation Matrix + var CTM = node.getCTM(); + // Work our way through the ancestor nodes stopping at the SVG Document + while ( ( node = node.parentNode ) != targetNode ) { + // Multiply the new CTM to the one with what we have accumulated so far + CTM = node.getCTM().multiply(CTM); + } + } + return CTM; +} + +/** + * converts HSV to RGB values + * @param {Number} hue the hue value (between 0 and 360) + * @param {Number} sat the saturation value (between 0 and 1) + * @param {Number} val the value value (between 0 and 1) + * @return rgbArr the rgb values (associative array or object, the keys are: red,green,blue), all values are scaled between 0 and 255 + * @type Object + * @version 1.0 (2007-05-01) + * @see #rgb2hsv + */ +function hsv2rgb(hue,sat,val) { + var rgbArr = new Object(); + if ( sat == 0) { + rgbArr["red"] = Math.round(val * 255); + rgbArr["green"] = Math.round(val * 255); + rgbArr["blue"] = Math.round(val * 255); + } + else { + var h = hue / 60; + var i = Math.floor(h); + var f = h - i; + if (i % 2 == 0) { + f = 1 - f; + } + var m = val * (1 - sat); + var n = val * (1 - sat * f); + switch(i) { + case 0: + rgbArr["red"] = val; + rgbArr["green"] = n; + rgbArr["blue"] = m; + break; + case 1: + rgbArr["red"] = n; + rgbArr["green"] = val; + rgbArr["blue"] = m; + break; + case 2: + rgbArr["red"] = m; + rgbArr["green"] = val; + rgbArr["blue"] = n; + break; + case 3: + rgbArr["red"] = m; + rgbArr["green"] = n; + rgbArr["blue"] = val; + break; + case 4: + rgbArr["red"] = n; + rgbArr["green"] = m; + rgbArr["blue"] = val; + break; + case 5: + rgbArr["red"] = val; + rgbArr["green"] = m; + rgbArr["blue"] = n; + break; + case 6: + rgbArr["red"] = val; + rgbArr["green"] = n; + rgbArr["blue"] = m; + break; + } + rgbArr["red"] = Math.round(rgbArr["red"] * 255); + rgbArr["green"] = Math.round(rgbArr["green"] * 255); + rgbArr["blue"] = Math.round(rgbArr["blue"] * 255); + } + return rgbArr; +} + +/** + * converts RGB to HSV values + * @param {Number} red the hue value (between 0 and 255) + * @param {Number} green the saturation value (between 0 and 255) + * @param {Number} blue the value value (between 0 and 255) + * @return hsvArr the hsv values (associative array or object, the keys are: hue (0-360),sat (0-1),val (0-1)) + * @type Object + * @version 1.0 (2007-05-01) + * @see #hsv2rgb + */ +function rgb2hsv(red,green,blue) { + var hsvArr = new Object(); + red = red / 255; + green = green / 255; + blue = blue / 255; + myMax = Math.max(red, Math.max(green,blue)); + myMin = Math.min(red, Math.min(green,blue)); + v = myMax; + if (myMax > 0) { + s = (myMax - myMin) / myMax; + } + else { + s = 0; + } + if (s > 0) { + myDiff = myMax - myMin; + rc = (myMax - red) / myDiff; + gc = (myMax - green) / myDiff; + bc = (myMax - blue) / myDiff; + if (red == myMax) { + h = (bc - gc) / 6; + } + if (green == myMax) { + h = (2 + rc - bc) / 6; + } + if (blue == myMax) { + h = (4 + gc - rc) / 6; + } + } + else { + h = 0; + } + if (h < 0) { + h += 1; + } + hsvArr["hue"] = Math.round(h * 360); + hsvArr["sat"] = s; + hsvArr["val"] = v; + return hsvArr; +} + +/** + * populates an array such that it can be addressed by both a key or an index nr, + * note that both Arrays need to be of the same length + * @param {Array} arrayKeys the array containing the keys + * @param {Array} arrayValues the array containing the values + * @return returnArray the resulting array containing both associative values and also a regular indexed array + * @type Array + * @version 1.0 (2007-05-01) + */ +function arrayPopulate(arrayKeys,arrayValues) { + var returnArray = new Array(); + if (arrayKeys.length != arrayValues.length) { + alert("error: arrays do not have the same length!"); + } + else { + for (i=0;i<arrayKeys.length;i++) { + returnArray[arrayKeys[i]] = arrayValues[i]; + } + } + return returnArray; +} + +/** + * Wrapper object for network requests, uses getURL or XMLHttpRequest depending on availability + * The callBackFunction receives a XML or text node representing the rootElement + * of the fragment received or the return text, depending on the returnFormat. + * See also the following <a href="http://www.carto.net/papers/svg/network_requests/">documentation</a>. + * @class this is a wrapper object to provide network request functionality (get|post) + * @param {String} url the URL/IRI of the network resource to be called + * @param {Function|Object} callBackFunction the callBack function or object that is called after the data was received, in case of an object, the method 'receiveData' is called; both the function and the object's 'receiveData' method get 2 return parameters: 'node.firstChild'|text (the root element of the XML or text resource), this.additionalParams (if defined) + * @param {String} returnFormat the return format, either 'xml' or 'json' (or text) + * @param {String} method the method of the network request, either 'get' or 'post' + * @param {String|Undefined} postText the String containing the post text (optional) or Undefined (if not a 'post' request) + * @param {Object|Array|String|Number|Undefined} additionalParams additional parameters that will be passed to the callBackFunction or object (optional) or Undefined + * @return a new getData instance + * @type getData + * @constructor + * @version 1.0 (2007-02-23) + */ +function getData(url,callBackFunction,returnFormat,method,postText,additionalParams) { + this.url = url; + this.callBackFunction = callBackFunction; + this.returnFormat = returnFormat; + this.method = method; + this.additionalParams = additionalParams; + if (method != "get" && method != "post") { + alert("Error in network request: parameter 'method' must be 'get' or 'post'"); + } + this.postText = postText; + this.xmlRequest = null; //@private reference to the XMLHttpRequest object +} + +/** + * triggers the network request defined in the constructor + */ +getData.prototype.getData = function() { + //call getURL() if available + if (window.getURL) { + if (this.method == "get") { + getURL(this.url,this); + } + if (this.method == "post") { + postURL(this.url,this.postText,this); + } + } + //or call XMLHttpRequest() if available + else if (window.XMLHttpRequest) { + var _this = this; + this.xmlRequest = new XMLHttpRequest(); + if (this.method == "get") { + if (this.returnFormat == "xml") { + this.xmlRequest.overrideMimeType("text/xml"); + } + this.xmlRequest.open("GET",this.url,true); + } + if (this.method == "post") { + this.xmlRequest.open("POST",this.url,true); + } + this.xmlRequest.onreadystatechange = function() {_this.handleEvent()}; + if (this.method == "get") { + this.xmlRequest.send(null); + } + if (this.method == "post") { + //test if postText exists and is of type string + var reallyPost = true; + if (!this.postText) { + reallyPost = false; + alert("Error in network post request: missing parameter 'postText'!"); + } + if (typeof(this.postText) != "string") { + reallyPost = false; + alert("Error in network post request: parameter 'postText' has to be of type 'string')"); + } + if (reallyPost) { + this.xmlRequest.send(this.postText); + } + } + } + //write an error message if neither method is available + else { + alert("your browser/svg viewer neither supports window.getURL nor window.XMLHttpRequest!"); + } +} + +/** + * this is the callback method for the getURL() or postURL() case + * @private + */ +getData.prototype.operationComplete = function(data) { + //check if data has a success property + if (data.success) { + //parse content of the XML format to the variable "node" + if (this.returnFormat == "xml") { + //convert the text information to an XML node and get the first child + var node = parseXML(data.content,document); + //distinguish between a callback function and an object + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(node.firstChild,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(node.firstChild,this.additionalParams); + } + } + if (this.returnFormat == "json") { + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(data.content,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(data.content,this.additionalParams); + } + } + } + else { + alert("something went wrong with dynamic loading of geometry!"); + } +} + +/** + * this is the callback method for the XMLHttpRequest case + * @private + */ +getData.prototype.handleEvent = function() { + if (this.xmlRequest.readyState == 4) { + if (this.returnFormat == "xml") { + //we need to import the XML node first + var importedNode = document.importNode(this.xmlRequest.responseXML.documentElement,true); + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(importedNode,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(importedNode,this.additionalParams); + } + } + if (this.returnFormat == "json") { + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(this.xmlRequest.responseText,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(this.xmlRequest.responseText,this.additionalParams); + } + } + } +} + +/** + * Serializes an XML node and returns a string representation. Wrapper function to hide implementation differences. + * This can be used for debugging purposes or to post data to a server or network resource. + * @param {dom::Node} node the DOM node reference + * @return textRepresentation the String representation of the XML node + * @type String + * @version 1.0 (2007-05-01) + * @see getData + */ +function serializeNode(node) { + if (typeof XMLSerializer != 'undefined') { + return new XMLSerializer().serializeToString(node); + } + else if (typeof node.xml != 'undefined') { + return node.xml; + } + else if (typeof printNode != 'undefined') { + return printNode(node); + } + else if (typeof Packages != 'undefined') { + try { + var stringWriter = new java.io.StringWriter(); + Packages.org.apache.batik.dom.util.DOMUtilities.writeNode(node,stringWriter); + return stringWriter.toString(); + } + catch (e) { + alert("Sorry, your SVG viewer does not support the printNode/serialize function."); + return ''; + } + } + else { + alert("Sorry, your SVG viewer does not support the printNode/serialize function."); + return ''; + } +} + +/** + * Starts a SMIL animation element with the given id by triggering the '.beginElement()' method. + * This is a convenience (shortcut) function. + * @param {String} id a valid id of a valid SMIL animation element + * @version 1.0 (2007-05-01) + */ +//starts an animtion with the given id +//this function is useful in combination with window.setTimeout() +function startAnimation(id) { + document.getElementById(id).beginElement(); +} diff -r 0dc1fc63c945 -r 0b682d3dd01b static/scripts/timer.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/scripts/timer.js Tue Apr 13 11:15:35 2010 -0400 @@ -0,0 +1,74 @@ +// source/credits: "Algorithm": http://www.codingforums.com/showthread.php?s=&threadid=10531 +// The constructor should be called with +// the parent object (optional, defaults to window). + +function Timer(){ + this.obj = (arguments.length)?arguments[0]:window; + return this; +} + +// The set functions should be called with: +// - The name of the object method (as a string) (required) +// - The millisecond delay (required) +// - Any number of extra arguments, which will all be +// passed to the method when it is evaluated. + +Timer.prototype.setInterval = function(func, msec){ + var i = Timer.getNew(); + var t = Timer.buildCall(this.obj, i, arguments); + Timer.set[i].timer = window.setInterval(t,msec); + return i; +} +Timer.prototype.setTimeout = function(func, msec){ + var i = Timer.getNew(); + Timer.buildCall(this.obj, i, arguments); + Timer.set[i].timer = window.setTimeout("Timer.callOnce("+i+");",msec); + return i; +} + +// The clear functions should be called with +// the return value from the equivalent set function. + +Timer.prototype.clearInterval = function(i){ + if(!Timer.set[i]) return; + window.clearInterval(Timer.set[i].timer); + Timer.set[i] = null; +} +Timer.prototype.clearTimeout = function(i){ + if(!Timer.set[i]) return; + window.clearTimeout(Timer.set[i].timer); + Timer.set[i] = null; +} + +// Private data + +Timer.set = new Array(); +Timer.buildCall = function(obj, i, args){ + var t = ""; + Timer.set[i] = new Array(); + if(obj != window){ + Timer.set[i].obj = obj; + t = "Timer.set["+i+"].obj."; + } + t += args[0]+"("; + if(args.length > 2){ + Timer.set[i][0] = args[2]; + t += "Timer.set["+i+"][0]"; + for(var j=1; (j+2)<args.length; j++){ + Timer.set[i][j] = args[j+2]; + t += ", Timer.set["+i+"]["+j+"]"; + }} + t += ");"; + Timer.set[i].call = t; + return t; +} +Timer.callOnce = function(i){ + if(!Timer.set[i]) return; + eval(Timer.set[i].call); + Timer.set[i] = null; +} +Timer.getNew = function(){ + var i = 0; + while(Timer.set[i]) i++; + return i; +} \ No newline at end of file diff -r 0dc1fc63c945 -r 0b682d3dd01b tools/rgenetics/rgGRR.py --- a/tools/rgenetics/rgGRR.py Tue Apr 13 10:22:27 2010 -0400 +++ b/tools/rgenetics/rgGRR.py Tue Apr 13 11:15:35 2010 -0400 @@ -1,1096 +1,1145 @@ -""" -# july 2009: Need to see outliers so need to draw them last? -# could use clustering on the zscores to guess real relationships for unrelateds -# but definitely need to draw last -# added MAX_SHOW_ROWS to limit the length of the main report page -# Changes for Galaxy integration -# added more robust knuth method for one pass mean and sd -# no difference really - let's use scipy.mean() and scipy.std() instead... -# fixed labels and changed to .xls for outlier reports so can open in excel -# interesting - with a few hundred subjects, 5k gives good resolution -# and 100k gives better but not by much -# TODO remove non autosomal markers -# TODO it would be best if label had the zmean and zsd as these are what matter for -# outliers rather than the group mean/sd -# mods to rgGRR.py from channing CVS which John Ziniti has rewritten to produce SVG plots -# to make a Galaxy tool - we need the table of mean and SD for interesting pairs, the SVG and the log -# so the result should be an HTML file - -# rgIBS.py -# use a random subset of markers for a quick ibs -# to identify sample dups and closely related subjects -# try snpMatrix and plink and see which one works best for us? -# abecasis grr plots mean*sd for every subject to show clusters -# mods june 23 rml to avoid non-autosomal markers -# we seem to be distinguishing parent-child by gender - 2 clouds! - - -snpMatrix from David Clayton has: -ibs.stats function to calculate the identity-by-state stats of a group of samples -Description -Given a snp.matrix-class or a X.snp.matrix-class object with N samples, calculates some statistics -about the relatedness of every pair of samples within. - -Usage -ibs.stats(x) -8 ibs.stats -Arguments -x a snp.matrix-class or a X.snp.matrix-class object containing N samples -Details -No-calls are excluded from consideration here. -Value -A data.frame containing N(N - 1)/2 rows, where the row names are the sample name pairs separated -by a comma, and the columns are: -Count count of identical calls, exclusing no-calls -Fraction fraction of identical calls comparied to actual calls being made in both samples -Warning -In some applications, it may be preferable to subset a (random) selection of SNPs first - the -calculation -time increases as N(N - 1)M/2 . Typically for N = 800 samples and M = 3000 SNPs, the -calculation time is about 1 minute. A full GWA scan could take hours, and quite unnecessary for -simple applications such as checking for duplicate or related samples. -Note -This is mostly written to find mislabelled and/or duplicate samples. -Illumina indexes their SNPs in alphabetical order so the mitochondria SNPs comes first - for most -purpose it is undesirable to use these SNPs for IBS purposes. -TODO: Worst-case S4 subsetting seems to make 2 copies of a large object, so one might want to -subset before rbind(), etc; a future version of this routine may contain a built-in subsetting facility -""" -import sys,os,time,random,string,copy,optparse - -try: - set -except NameError: - from Sets import Set as set - -from rgutils import timenow -import plinkbinJZ - - -opts = None -verbose = False - -showPolygons = False - -class NullDevice: - def write(self, s): - pass - -tempstderr = sys.stderr # save -sys.stderr = NullDevice() -# need to avoid blather about deprecation and other strange stuff from scipy -# the current galaxy job runner assumes that -# the job is in error if anything appears on sys.stderr -# grrrrr. James wants to keep it that way instead of using the -# status flag for some strange reason. Presumably he doesn't use R or (in this case, scipy) -import numpy -import scipy -from scipy import weave - - -sys.stderr=tempstderr - - -PROGNAME = os.path.split(sys.argv[0])[-1] -X_AXIS_LABEL = 'Mean Alleles Shared' -Y_AXIS_LABEL = 'SD Alleles Shared' -LEGEND_ALIGN = 'topleft' -LEGEND_TITLE = 'Relationship' -DEFAULT_SYMBOL_SIZE = 1.0 # default symbol size -DEFAULT_SYMBOL_SIZE = 0.5 # default symbol size - -### Some colors for R/rpy -R_BLACK = 1 -R_RED = 2 -R_GREEN = 3 -R_BLUE = 4 -R_CYAN = 5 -R_PURPLE = 6 -R_YELLOW = 7 -R_GRAY = 8 - -### ... and some point-styles - -### -PLOT_HEIGHT = 600 -PLOT_WIDTH = 1150 - - -#SVG_COLORS = ('black', 'darkblue', 'blue', 'deepskyblue', 'firebrick','maroon','crimson') -#SVG_COLORS = ('cyan','dodgerblue','mediumpurple', 'fuchsia', 'red','gold','gray') -SVG_COLORS = ('cyan','dodgerblue','mediumpurple','forestgreen', 'lightgreen','gold','gray') -# dupe,parentchild,sibpair,halfsib,parents,unrel,unkn -#('orange', 'red', 'green', 'chartreuse', 'blue', 'purple', 'gray') - -OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Rel_Mean)\tSdev(Rel_Mean)\tMean(Rel_Sdev)\tSdev(Rel_Sdev)\n' -OUTLIERS_HEADER_list = ['Mean','Sdev','ZMean','ZSdev','FID1','IID1','FID2','IID2', -'RGMean_M','RGMean_SD','RGSD_M','RGSD_SD'] -TABLE_HEADER='fid1 iid1\tfid2 iid2\tmean\tsdev\tzmean\tzsdev\tgeno\trelcode\n' - - -### Relationship codes, text, and lookups/mappings -N_RELATIONSHIP_TYPES = 7 -REL_DUPE, REL_PARENTCHILD, REL_SIBS, REL_HALFSIBS, REL_RELATED, REL_UNRELATED, REL_UNKNOWN = range(N_RELATIONSHIP_TYPES) -REL_LOOKUP = { - REL_DUPE: ('dupe', R_BLUE, 1), - REL_PARENTCHILD: ('parentchild', R_YELLOW, 1), - REL_SIBS: ('sibpairs', R_RED, 1), - REL_HALFSIBS: ('halfsibs', R_GREEN, 1), - REL_RELATED: ('parents', R_PURPLE, 1), - REL_UNRELATED: ('unrelated', R_CYAN, 1), - REL_UNKNOWN: ('unknown', R_GRAY, 1), - } -OUTLIER_STDEVS = { - REL_DUPE: 2, - REL_PARENTCHILD: 2, - REL_SIBS: 2, - REL_HALFSIBS: 2, - REL_RELATED: 2, - REL_UNRELATED: 3, - REL_UNKNOWN: 2, - } -# note now Z can be passed in - -REL_STATES = [REL_LOOKUP[r][0] for r in range(N_RELATIONSHIP_TYPES)] -REL_COLORS = SVG_COLORS -REL_POINTS = [REL_LOOKUP[r][2] for r in range(N_RELATIONSHIP_TYPES)] - -DEFAULT_MAX_SAMPLE_SIZE = 10000 - -REF_COUNT_HOM1 = 3 -REF_COUNT_HET = 2 -REF_COUNT_HOM2 = 1 -MISSING = 0 -MAX_SHOW_ROWS = 100 # framingham has millions - delays showing output page - so truncate and explain -MARKER_PAIRS_PER_SECOND_SLOW = 15000000.0 -MARKER_PAIRS_PER_SECOND_FAST = 70000000.0 - - -galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> -<head> -<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> -<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> -<title></title> -<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> -</head> -<body> -<div class="document"> -""" - - -SVG_HEADER = '''<?xml version="1.0" standalone="no"?> -<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.2//EN" "http://www.w3.org/Graphics/SVG/1.2/DTD/svg12.dtd"> - -<svg width="1280" height="800" - xmlns="http://www.w3.org/2000/svg" version="1.2" - xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1280 800" onload="init()"> - - <script type="text/ecmascript" xlink:href="/static/scripts/tools/rgenetics/checkbox_and_radiobutton.js"/> - <script type="text/ecmascript" xlink:href="/static/scripts/tools/rgenetics/helper_functions.js"/> - <script type="text/ecmascript" xlink:href="/static/scripts/tools/rgenetics/timer.js"/> - <script type="text/ecmascript"> - <![CDATA[ - var checkBoxes = new Array(); - var radioGroupBandwidth; - var colours = ['%s','%s','%s','%s','%s','%s','%s']; - function init() { - var style = {"font-family":"Arial,Helvetica", "fill":"black", "font-size":12}; - var dist = 12; - var yOffset = 4; - - //A checkBox for each relationship type dupe,parentchild,sibpair,halfsib,parents,unrel,unkn - checkBoxes["dupe"] = new checkBox("dupe","checkboxes",20,40,"cbRect","cbCross",true,"Duplicate",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["parentchild"] = new checkBox("parentchild","checkboxes",20,60,"cbRect","cbCross",true,"Parent-Child",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["sibpairs"] = new checkBox("sibpairs","checkboxes",20,80,"cbRect","cbCross",true,"Sib-pairs",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["halfsibs"] = new checkBox("halfsibs","checkboxes",20,100,"cbRect","cbCross",true,"Half-sibs",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["parents"] = new checkBox("parents","checkboxes",20,120,"cbRect","cbCross",true,"Parents",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["unrelated"] = new checkBox("unrelated","checkboxes",20,140,"cbRect","cbCross",true,"Unrelated",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["unknown"] = new checkBox("unknown","checkboxes",20,160,"cbRect","cbCross",true,"Unknown",style,dist,yOffset,undefined,hideShowLayer); - - } - - function hideShowLayer(id, status, label) { - var vis = "hidden"; - if (status) { - vis = "visible"; - } - document.getElementById(id).setAttributeNS(null, 'visibility', vis); - } - - function showBTT(evt, rel, mm, dm, md, dd, n, mg, dg, lg, hg) { - var x = parseInt(evt.pageX)-250; - var y = parseInt(evt.pageY)-110; - switch(rel) { - case 0: - fill = colours[rel]; - relt = "dupe"; - break; - case 1: - fill = colours[rel]; - relt = "parentchild"; - break; - case 2: - fill = colours[rel]; - relt = "sibpairs"; - break; - case 3: - fill = colours[rel]; - relt = "halfsibs"; - break; - case 4: - fill = colours[rel]; - relt = "parents"; - break; - case 5: - fill = colours[rel]; - relt = "unrelated"; - break; - case 6: - fill = colours[rel]; - relt = "unknown"; - break; - default: - fill = "cyan"; - relt = "ERROR_CODE: "+rel; - } - - document.getElementById("btRel").textContent = "GROUP: "+relt; - document.getElementById("btMean").textContent = "mean="+mm+" +/- "+dm; - document.getElementById("btSdev").textContent = "sdev="+dm+" +/- "+dd; - document.getElementById("btPair").textContent = "npairs="+n; - document.getElementById("btGeno").textContent = "ngenos="+mg+" +/- "+dg+" (min="+lg+", max="+hg+")"; - document.getElementById("btHead").setAttribute('fill', fill); - - var tt = document.getElementById("btTip"); - tt.setAttribute("transform", "translate("+x+","+y+")"); - tt.setAttribute('visibility', 'visible'); - } - - function showOTT(evt, rel, s1, s2, mean, sdev, ngeno, rmean, rsdev) { - var x = parseInt(evt.pageX)-150; - var y = parseInt(evt.pageY)-180; - - switch(rel) { - case 0: - fill = colours[rel]; - relt = "dupe"; - break; - case 1: - fill = colours[rel]; - relt = "parentchild"; - break; - case 2: - fill = colours[rel]; - relt = "sibpairs"; - break; - case 3: - fill = colours[rel]; - relt = "halfsibs"; - break; - case 4: - fill = colours[rel]; - relt = "parents"; - break; - case 5: - fill = colours[rel]; - relt = "unrelated"; - break; - case 6: - fill = colours[rel]; - relt = "unknown"; - break; - default: - fill = "cyan"; - relt = "ERROR_CODE: "+rel; - } - - document.getElementById("otRel").textContent = "PAIR: "+relt; - document.getElementById("otS1").textContent = "s1="+s1; - document.getElementById("otS2").textContent = "s2="+s2; - document.getElementById("otMean").textContent = "mean="+mean; - document.getElementById("otSdev").textContent = "sdev="+sdev; - document.getElementById("otGeno").textContent = "ngenos="+ngeno; - document.getElementById("otRmean").textContent = "relmean="+rmean; - document.getElementById("otRsdev").textContent = "relsdev="+rsdev; - document.getElementById("otHead").setAttribute('fill', fill); - - var tt = document.getElementById("otTip"); - tt.setAttribute("transform", "translate("+x+","+y+")"); - tt.setAttribute('visibility', 'visible'); - } - - function hideBTT(evt) { - document.getElementById("btTip").setAttributeNS(null, 'visibility', 'hidden'); - } - - function hideOTT(evt) { - document.getElementById("otTip").setAttributeNS(null, 'visibility', 'hidden'); - } - - ]]> - </script> - <defs> - <!-- symbols for check boxes --> - <symbol id="cbRect" overflow="visible"> - <rect x="-5" y="-5" width="10" height="10" fill="white" stroke="dimgray" stroke-width="1" cursor="pointer"/> - </symbol> - <symbol id="cbCross" overflow="visible"> - <g pointer-events="none" stroke="black" stroke-width="1"> - <line x1="-3" y1="-3" x2="3" y2="3"/> - <line x1="3" y1="-3" x2="-3" y2="3"/> - </g> - </symbol> - </defs> - -<desc>Developer Works Dynamic Scatter Graph Scaling Example</desc> - -<!-- Now Draw the main X and Y axis --> -<g style="stroke-width:1.0; stroke:black; shape-rendering:crispEdges"> - <!-- X Axis top and bottom --> - <path d="M 100 100 L 1250 100 Z"/> - <path d="M 100 700 L 1250 700 Z"/> - - <!-- Y Axis left and right --> - <path d="M 100 100 L 100 700 Z"/> - <path d="M 1250 100 L 1250 700 Z"/> -</g> - -<g transform="translate(100,100)"> - - <!-- Grid Lines --> - <g style="fill:none; stroke:#dddddd; stroke-width:1; stroke-dasharray:2,2; text-anchor:end; shape-rendering:crispEdges"> - - <!-- Vertical grid lines --> - <line x1="125" y1="0" x2="115" y2="600" /> - <line x1="230" y1="0" x2="230" y2="600" /> - <line x1="345" y1="0" x2="345" y2="600" /> - <line x1="460" y1="0" x2="460" y2="600" /> - <line x1="575" y1="0" x2="575" y2="600" style="stroke-dasharray:none;" /> - <line x1="690" y1="0" x2="690" y2="600" /> - <line x1="805" y1="0" x2="805" y2="600" /> - <line x1="920" y1="0" x2="920" y2="600" /> - <line x1="1035" y1="0" x2="1035" y2="600" /> - - <!-- Horizontal grid lines --> - <line x1="0" y1="60" x2="1150" y2="60" /> - <line x1="0" y1="120" x2="1150" y2="120" /> - <line x1="0" y1="180" x2="1150" y2="180" /> - <line x1="0" y1="240" x2="1150" y2="240" /> - <line x1="0" y1="300" x2="1150" y2="300" style="stroke-dasharray:none;" /> - <line x1="0" y1="360" x2="1150" y2="360" /> - <line x1="0" y1="420" x2="1150" y2="420" /> - <line x1="0" y1="480" x2="1150" y2="480" /> - <line x1="0" y1="540" x2="1150" y2="540" /> - </g> - - <!-- Legend --> - <g style="fill:black; stroke:none" font-size="12" font-family="Arial" transform="translate(25,25)"> - <rect width="160" height="270" style="fill:none; stroke:black; shape-rendering:crispEdges" /> - <text x="5" y="20" style="fill:black; stroke:none;" font-size="13" font-weight="bold">Given Pair Relationship</text> - <rect x="120" y="35" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="55" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="75" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="95" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="115" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="135" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="155" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <text x="15" y="195" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore gt 15</text> - <circle cx="125" cy="192" r="6" style="stroke:red; fill:gold; fill-opacity:1.0; stroke-width:1;"/> - <text x="15" y="215" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore 4 to 15</text> - <circle cx="125" cy="212" r="3" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> - <text x="15" y="235" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore lt 4</text> - <circle cx="125" cy="232" r="2" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> - <g id="checkboxes"> - </g> - </g> - - - <g style='fill:black; stroke:none' font-size="17" font-family="Arial"> - <!-- X Axis Labels --> - <text x="480" y="660">Mean Alleles Shared</text> - <text x="0" y="630" >1.0</text> - <text x="277" y="630" >1.25</text> - <text x="564" y="630" >1.5</text> - <text x="842" y="630" >1.75</text> - <text x="1140" y="630" >2.0</text> - </g> - - <g transform="rotate(270)" style="fill:black; stroke:none" font-size="17" font-family="Arial"> - <!-- Y Axis Labels --> - <text x="-350" y="-40">SD Alleles Shared</text> - <text x="-20" y="-10" >1.0</text> - <text x="-165" y="-10" >0.75</text> - <text x="-310" y="-10" >0.5</text> - <text x="-455" y="-10" >0.25</text> - <text x="-600" y="-10" >0.0</text> - </g> - -<!-- Plot Title --> -<g style="fill:black; stroke:none" font-size="18" font-family="Arial"> - <text x="425" y="-30">%s</text> -</g> - -<!-- One group/layer of points for each relationship type --> -''' - -SVG_FOOTER = ''' -<!-- End of Data --> -</g> -<g id="btTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> - <rect width="250" height="110" style="fill:silver" rx="2" ry="2"/> - <rect id="btHead" width="250" height="20" rx="2" ry="2" /> - <text id="btRel" y="14" x="85">unrelated</text> - <text id="btMean" y="40" x="4">mean=1.5 +/- 0.04</text> - <text id="btSdev" y="60" x="4">sdev=0.7 +/- 0.03</text> - <text id="btPair" y="80" x="4">npairs=1152</text> - <text id="btGeno" y="100" x="4">ngenos=4783 +/- 24 (min=1000, max=5000)</text> -</g> - -<g id="otTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> - <rect width="150" height="180" style="fill:silver" rx="2" ry="2"/> - <rect id="otHead" width="150" height="20" rx="2" ry="2" /> - <text id="otRel" y="14" x="40">sibpairs</text> - <text id="otS1" y="40" x="4">s1=fid1,iid1</text> - <text id="otS2" y="60" x="4">s2=fid2,iid2</text> - <text id="otMean" y="80" x="4">mean=1.82</text> - <text id="otSdev" y="100" x="4">sdev=0.7</text> - <text id="otGeno" y="120" x="4">ngeno=4487</text> - <text id="otRmean" y="140" x="4">relmean=1.85</text> - <text id="otRsdev" y="160" x="4">relsdev=0.65</text> -</g> -</svg> -''' - -OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Mean)\tSdev(Mean)\tMean(Sdev)\tSdev(Sdev)\n' - -DEFAULT_MAX_SAMPLE_SIZE = 5000 - -REF_COUNT_HOM1 = 3 -REF_COUNT_HET = 2 -REF_COUNT_HOM2 = 1 -MISSING = 0 - -MARKER_PAIRS_PER_SECOND_SLOW = 15000000 -MARKER_PAIRS_PER_SECOND_FAST = 70000000 - -POLYGONS = { - REL_UNRELATED: ((1.360, 0.655), (1.385, 0.730), (1.620, 0.575), (1.610, 0.505)), - REL_HALFSIBS: ((1.630, 0.500), (1.630, 0.550), (1.648, 0.540), (1.648, 0.490)), - REL_SIBS: ((1.660, 0.510), (1.665, 0.560), (1.820, 0.410), (1.820, 0.390)), - REL_PARENTCHILD: ((1.650, 0.470), (1.650, 0.490), (1.750, 0.440), (1.750, 0.420)), - REL_DUPE: ((1.970, 0.000), (1.970, 0.150), (2.000, 0.150), (2.000, 0.000)), - } - -def distance(point1, point2): - """ Calculate the distance between two points - """ - (x1,y1) = [float(d) for d in point1] - (x2,y2) = [float(d) for d in point2] - dx = abs(x1 - x2) - dy = abs(y1 - y2) - return math.sqrt(dx**2 + dy**2) - -def point_inside_polygon(x, y, poly): - """ Determine if a point (x,y) is inside a given polygon or not - poly is a list of (x,y) pairs. - - Taken from: http://www.ariel.com.au/a/python-point-int-poly.html - """ - - n = len(poly) - inside = False - - p1x,p1y = poly[0] - for i in range(n+1): - p2x,p2y = poly[i % n] - if y > min(p1y,p2y): - if y <= max(p1y,p2y): - if x <= max(p1x,p2x): - if p1y != p2y: - xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x - if p1x == p2x or x <= xinters: - inside = not inside - p1x,p1y = p2x,p2y - return inside - -def readMap(pedfile): - """ - """ - mapfile = pedfile.replace('.ped', '.map') - marker_list = [] - if os.path.exists(mapfile): - print 'readMap: %s' % (mapfile) - fh = file(mapfile, 'r') - for line in fh: - marker_list.append(line.strip().split()) - fh.close() - print 'readMap: %s markers' % (len(marker_list)) - return marker_list - -def calcMeanSD(useme): - """ - A numerically stable algorithm is given below. It also computes the mean. - This algorithm is due to Knuth,[1] who cites Welford.[2] - n = 0 - mean = 0 - M2 = 0 - - foreach x in data: - n = n + 1 - delta = x - mean - mean = mean + delta/n - M2 = M2 + delta*(x - mean) // This expression uses the new value of mean - end for - - variance_n = M2/n - variance = M2/(n - 1) - """ - mean = 0.0 - M2 = 0.0 - sd = 0.0 - n = len(useme) - if n > 1: - for i,x in enumerate(useme): - delta = x - mean - mean = mean + delta/(i+1) # knuth uses n+=1 at start - M2 = M2 + delta*(x - mean) # This expression uses the new value of mean - variance = M2/(n-1) # assume is sample so lose 1 DOF - sd = pow(variance,0.5) - return mean,sd - - -def doIBSpy(inpath='',basename='',outdir=None,logf=None, - nrsSamples=10000,title='title',pdftoo=0,Zcutoff=2.0): - #def doIBS(pedName, title, nrsSamples=None, pdftoo=False): - """ started with snpmatrix but GRR uses actual IBS counts and sd's - """ - repOut = [] # text strings to add to the html display - refallele = {} - tblf = '%s_table.xls' % (title) - tbl = file(os.path.join(outdir,tblf), 'w') - tbl.write(TABLE_HEADER) - svgf = '%s.svg' % (title) - svg = file(os.path.join(outdir,svgf), 'w') - - bedname = '%s.bed' % (inpath) - pedname = '%s.ped' % (inpath) - print 'pedname',pedname - if os.path.exists(bedname): - ped = plinkbinJZ.BPed(inpath) - ped.parse(quick=True) - elif os.path.exists(pedname): - ped = plinkbinJZ.LPed(inpath) - ped.parse() - else: - print >> sys.stdout, '## doIBSpy problem - cannot open %s or %s - cannot run' % (bedname,pedname) - nMarkers = len(ped._markers) - if nMarkers < 5: - print sys.stderr, '### ERROR - %d is too few markers for reliable estimation in %s - terminating' % (nMarkers,PROGNAME) - sys.exit(1) - nSubjects = len(ped._subjects) - nrsSamples = min(nMarkers, nrsSamples) - if opts and opts.use_mito: - markers = range(nMarkers) - nrsSamples = min(len(markers), nrsSamples) - sampleIndexes = sorted(random.sample(markers, nrsSamples)) - else: - autosomals = ped.autosomal_indices() - nrsSamples = min(len(autosomals), nrsSamples) - sampleIndexes = sorted(random.sample(autosomals, nrsSamples)) - - print '' - print 'Getting random.sample of %s from %s total' % (nrsSamples, nMarkers) - npairs = (nSubjects*(nSubjects-1))/2 # total rows in table - newfiles=[svgf,tblf] - explanations = ['rgGRR Plot (requires SVG)','Mean by SD alleles shared - %d rows' % npairs] - # these go with the output file links in the html file - s = 'Reading genotypes for %s subjects and %s markers\n' % (nSubjects, nrsSamples) - logf.write(s) - minUsegenos = nrsSamples/2 # must have half? - nGenotypes = nSubjects*nrsSamples - stime = time.time() - emptyRows = set() - genos = numpy.zeros((nSubjects, nrsSamples), dtype=int) - for s in xrange(nSubjects): - nValid = 0 - #getGenotypesByIndices(self, s, mlist, format) - genos[s] = ped.getGenotypesByIndices(s, sampleIndexes, format='ref') - nValid = sum([1 for g in genos[s] if g]) - if not nValid: - emptyRows.add(s) - sub = ped.getSubject(s) - print 'All missing for row %d (%s)' % (s, sub) - logf.write('All missing for row %d (%s)\n' % (s, sub)) - rtime = time.time() - stime - if verbose: - print '@@Read %s genotypes in %s seconds' % (nGenotypes, rtime) - - - ### Now the expensive part. For each pair of subjects, we get the mean number - ### and standard deviation of shared alleles over all of the markers where both - ### subjects have a known genotype. Identical subjects should have mean shared - ### alleles very close to 2.0 with a standard deviation very close to 0.0. - tot = nSubjects*(nSubjects-1)/2 - nprog = tot/10 - nMarkerpairs = tot * nrsSamples - estimatedTimeSlow = nMarkerpairs/MARKER_PAIRS_PER_SECOND_SLOW - estimatedTimeFast = nMarkerpairs/MARKER_PAIRS_PER_SECOND_FAST - - pairs = [] - pair_data = {} - means = [] ## Mean IBS for each pair - ngenoL = [] ## Count of comparable genotypes for each pair - sdevs = [] ## Standard dev for each pair - rels = [] ## A relationship code for each pair - zmeans = [0.0 for x in xrange(tot)] ## zmean score for each pair for the relgroup - zstds = [0.0 for x in xrange(tot)] ## zstd score for each pair for the relgrp - skip = set() - ndone = 0 ## How many have been done so far - - logf.write('Calculating %d pairs, updating every %d pairs...\n' % (tot, nprog)) - logf.write('Estimated time is %2.2f to %2.2f seconds ...\n' % (estimatedTimeFast, estimatedTimeSlow)) - - t1sum = 0 - t2sum = 0 - t3sum = 0 - now = time.time() - scache = {} - _founder_cache = {} - C_CODE = """ - #include "math.h" - int i; - int sumibs = 0; - int ssqibs = 0; - int ngeno = 0; - float mean = 0; - float M2 = 0; - float delta = 0; - float sdev=0; - float variance=0; - for (i=0; i<nrsSamples; i++) { - int a1 = g1[i]; - int a2 = g2[i]; - if (a1 != 0 && a2 != 0) { - ngeno += 1; - int shared = 2-abs(a1-a2); - delta = shared - mean; - mean = mean + delta/ngeno; - M2 += delta*(shared-mean); - // yes that second time, the updated mean is used see calcmeansd above; - //printf("%d %d %d %d %d %d\\n", i, a1, a2, ngeno, shared, squared); - } - } - if (ngeno > 1) { - variance = M2/(ngeno-1); - sdev = sqrt(variance); - //printf("OK: %d %3.2f %3.2f\\n", ngeno, mean, sdev); - } - //printf("%d %d %d %1.2f %1.2f\\n", ngeno, sumibs, ssqibs, mean, sdev); - result[0] = ngeno; - result[1] = mean; - result[2] = sdev; - return_val = ngeno; - """ - started = time.time() - for s1 in xrange(nSubjects): - if s1 in emptyRows: - continue - (fid1,iid1,did1,mid1,sex1,phe1,iid1,d_sid1,m_sid1) = scache.setdefault(s1, ped.getSubject(s1)) - - isFounder1 = _founder_cache.setdefault(s1, (did1==mid1)) - g1 = genos[s1] - - for s2 in xrange(s1+1, nSubjects): - if s2 in emptyRows: - continue - if nprog and ndone % nprog == 0 and ndone > 1: - dur = time.time() - started - pct = float(ndone)/tot*100.0 - logf.write('%f sec at pair %d of %d (%3.2f%%): %f marker*pairs/sec\n' % (dur, ndone, tot, pct, ndone/dur*nrsSamples)) - t1s = time.time() - - (fid2,iid2,did2,mid2,sex2,phe2,iid2,d_sid2,m_sid2) = scache.setdefault(s2, ped.getSubject(s2)) - - g2 = genos[s2] - isFounder2 = _founder_cache.setdefault(s2, (did2==mid2)) - - # Determine the relationship for this pair - relcode = REL_UNKNOWN - if (fid2 == fid1): - if iid1 == iid2: - relcode = REL_DUPE - elif (did2 == did1) and (mid2 == mid1) and did1 != mid1: - relcode = REL_SIBS - elif (iid1 == mid2) or (iid1 == did2) or (iid2 == mid1) or (iid2 == did1): - relcode = REL_PARENTCHILD - elif (str(did1) != '0' and (did2 == did1)) or (str(mid1) != '0' and (mid2 == mid1)): - relcode = REL_HALFSIBS - else: - # People in the same family should be marked as some other - # form of related. In general, these people will have a - # pretty random spread of similarity. This distinction is - # probably not very useful most of the time - relcode = REL_RELATED - else: - ### Different families - relcode = REL_UNRELATED - - t1e = time.time() - t1sum += t1e-t1s - - - ### Calculate sum(2-abs(a1-a2)) and sum((2-abs(a1-a2))**2) and count - ### the number of contributing genotypes. These values are not actually - ### calculated here, but instead are looked up in a table for speed. - ### FIXME: This is still too slow ... - result = [0.0, 0.0, 0.0] - ngeno = weave.inline(C_CODE, ['g1', 'g2', 'nrsSamples', 'result']) - if ngeno >= minUsegenos: - _, mean, sdev = result - means.append(mean) - sdevs.append(sdev) - ngenoL.append(ngeno) - pairs.append((s1, s2)) - rels.append(relcode) - else: - skip.add(ndone) # signal no comparable genotypes for this pair - ndone += 1 - t2e = time.time() - t2sum += t2e-t1e - t3e = time.time() - t3sum += t3e-t2e - - logme = [ 'T1: %s' % (t1sum), 'T2: %s' % (t2sum), 'T3: %s' % (t3sum),'TOT: %s' % (t3e-now), - '%s pairs with no (or not enough) comparable genotypes (%3.1f%%)' % (len(skip), - float(len(skip))/float(tot)*100)] - logf.write('%s\n' % '\t'.join(logme)) - ### Calculate mean and standard deviation of scores on a per relationship - ### type basis, allowing us to flag outliers for each particular relationship - ### type - relstats = {} - relCounts = {} - outlierFiles = {} - for relCode, relInfo in REL_LOOKUP.items(): - relName, relColor, relStyle = relInfo - useme = [means[x] for x in xrange(len(means)) if rels[x] == relCode] - relCounts[relCode] = len(useme) - mm = scipy.mean(useme) - ms = scipy.std(useme) - useme = [sdevs[x] for x in xrange(len(sdevs)) if rels[x] == relCode] - sm = scipy.mean(useme) - ss = scipy.std(useme) - relstats[relCode] = {'sd':(sm,ss), 'mean':(mm,ms)} - logf.write('Relstate %s: mean(mean)=%3.2f sdev(mean)=%3.2f, mean(sdev)=%3.2f sdev(sdev)=%3.2f\n' % (relName, mm, ms, sm, ss)) - - ### now fake z scores for each subject like abecasis recommends max(|zmu|,|zsd|) - ### within each group, for each pair, z=(groupmean-pairmean)/groupsd - available = len(means) - logf.write('%d pairs are available of %d\n' % (available, tot)) - ### s = '\nOutliers:\nrelationship\tzmean\tzsd\tped1\tped2\tmean\tsd\trmeanmean\trmeansd\trsdmean\trsdsd\n' - ### logf.write(s) - pairnum = 0 - offset = 0 - nOutliers = 0 - cexs = [] - outlierRecords = dict([(r, []) for r in range(N_RELATIONSHIP_TYPES)]) - zsdmax = 0 - for s1 in range(nSubjects): - if s1 in emptyRows: - continue - (fid1,iid1,did1,mid1,sex1,aff1,ok1,d_sid1,m_sid1) = scache[s1] - for s2 in range(s1+1, nSubjects): - if s2 in emptyRows: - continue - if pairnum not in skip: - ### Get group stats for this relationship - (fid2,iid2,did2,mid2,sex2,aff2,ok2,d_sid2,m_sid2) = scache[s2] - try: - r = rels[offset] - except IndexError: - logf.write('###OOPS offset %d available %d pairnum %d len(rels) %d', offset, available, pairnum, len(rels)) - rmm,rmd = relstats[r]['mean'] # group mean, group meansd alleles shared - rdm,rdd = relstats[r]['sd'] # group sdmean, group sdsd alleles shared - - try: - zsd = (sdevs[offset] - rdm)/rdd # distance from group mean in group sd units - except: - zsd = 1 - if abs(zsd) > zsdmax: - zsdmax = zsd # keep for sort scaling - try: - zmean = (means[offset] - rmm)/rmd # distance from group mean - except: - zmean = 1 - zmeans[offset] = zmean - zstds[offset] = zsd - pid=(s1,s2) - zrad = max(zsd,zmean) - if zrad < 4: - zrad = 2 - elif 4 < zrad < 15: - zrad = 3 # to 9 - else: # > 15 6=24+ - zrad=zrad/4 - zrad = min(zrad,6) # scale limit - zrad = max(2,max(zsd,zmean)) # as > 2, z grows - pair_data[pid] = (zmean,zsd,r,zrad) - if max(zsd,zmean) > Zcutoff: # is potentially interesting - mean = means[offset] - sdev = sdevs[offset] - outlierRecords[r].append((mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd)) - nOutliers += 1 - tbl.write('%s_%s\t%s_%s\t%f\t%f\t%f\t%f\t%d\t%s\n' % \ - (fid1, iid1, fid2, iid2, mean, sdev, zmean,zsd, ngeno, relcode)) - offset += 1 - pairnum += 1 - logf.write( 'Outliers: %s\n' % (nOutliers)) - - ### Write outlier files for each relationship type - repOut.append('<h2>Outliers in tab delimited files linked above are also listed below</h2>') - lzsd = round(numpy.log10(zsdmax)) + 1 - scalefactor = 10**lzsd - for relCode, relInfo in REL_LOOKUP.items(): - relName, _, _ = relInfo - outliers = outlierRecords[relCode] - if not outliers: - continue - outliers = [(scalefactor*int(abs(x[3]))+ int(abs(x[2])),x) for x in outliers] # decorate - outliers.sort() - logf.write('### outliers after decorated sort=%s' % outliers) - outliers.reverse() # largest deviation first - logf.write('### outliers after decorated sort=%s' % outliers) - outliers = [x[1] for x in outliers] # undecorate - nrows = len(outliers) - truncated = 0 - if nrows > MAX_SHOW_ROWS: - s = '<h3>%s outlying pairs (top %d of %d) from %s</h3><table border="0" cellpadding="3">' % (relName, - MAX_SHOW_ROWS,nrows,title) - truncated = nrows - MAX_SHOW_ROWS - else: - s = '<h3>%s outlying pairs (n=%d) from %s</h3><table border="0" cellpadding="3">' % (relName,nrows,title) - repOut.append(s) - fhname = '%s_rgGRR_%s_outliers.xls' % (title, relName) - fhpath = os.path.join(outdir,fhname) - fh = open(fhpath, 'w') - newfiles.append(fhname) - explanations.append('%s Outlier Pairs %s, N=%d, Cutoff SD=%f' % (relName,title,len(outliers),Zcutoff)) - fh.write(OUTLIERS_HEADER) - s = ''.join(['<th>%s</th>' % x for x in OUTLIERS_HEADER_list]) - repOut.append('<tr align="center">%s</tr>' % s) - for n,rec in enumerate(outliers): - #(mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd) = rec - fh.write('%f\t%f\t%f\t%f\t%s\t%s\t%s\t%s\t%f\t%f\t%f\t%f\n' % tuple(rec)) - # (mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd)) - s = '''<td>%f</td><td>%f</td><td>%f</td><td>%f</td><td>%s</td><td>%s</td> - <td>%s</td><td>%s</td><td>%f</td><td>%f</td><td>%f</td><td>%f</td>''' % tuple(rec) - if n < MAX_SHOW_ROWS: - repOut.append('<tr align="center">%s</tr>' % s) - if truncated > 0: - repOut.append('<H2>WARNING: %d rows truncated - see outlier file for all %d rows</H2>' % (truncated, - nrows)) - fh.close() - repOut.append('</table><p>') - - ### Now, draw the plot in jpeg and svg formats, and optionally in the PDF format - ### if requested - logf.write('Plotting ...') - pointColors = [REL_COLORS[rel] for rel in rels] - pointStyles = [REL_POINTS[rel] for rel in rels] - - mainTitle = '%s (%s subjects, %d snp)' % (title, nSubjects, nrsSamples) - svg.write(SVG_HEADER % (SVG_COLORS[0],SVG_COLORS[1],SVG_COLORS[2],SVG_COLORS[3],SVG_COLORS[4], - SVG_COLORS[5],SVG_COLORS[6],SVG_COLORS[0],SVG_COLORS[0],SVG_COLORS[1],SVG_COLORS[1], - SVG_COLORS[2],SVG_COLORS[2],SVG_COLORS[3],SVG_COLORS[3],SVG_COLORS[4],SVG_COLORS[4], - SVG_COLORS[5],SVG_COLORS[5],SVG_COLORS[6],SVG_COLORS[6],mainTitle)) - #rpy.r.jpeg(filename='%s.jpg' % (title), width=1600, height=1200, pointsize=12, quality=100, bg='white') - #rpy.r.par(mai=(1,1,1,0.5)) - #rpy.r('par(xaxs="i",yaxs="i")') - #rpy.r.plot(means, sdevs, main=mainTitle, ylab=Y_AXIS_LABEL, xlab=X_AXIS_LABEL, cex=cexs, col=pointColors, pch=pointStyles, xlim=(0,2), ylim=(0,2)) - #rpy.r.legend(LEGEND_ALIGN, legend=REL_STATES, pch=REL_POINTS, col=REL_COLORS, title=LEGEND_TITLE) - #rpy.r.grid(nx=10, ny=10, col='lightgray', lty='dotted') - #rpy.r.dev_off() - - ### We will now go through each relationship type to partition plot points - ### into "bulk" and "outlier" groups. Bulk points will represent common - ### mean/sdev pairs and will cover the majority of the points in the plot -- - ### they will use generic tooltip informtion about all of the pairs - ### represented by that point. "Outlier" points will be uncommon pairs, - ### with very specific information in their tooltips. It would be nice to - ### keep hte total number of plotted points in the SVG representation to - ### ~10000 (certainly less than 100000?) - pointMap = {} - orderedRels = [y[1] for y in reversed(sorted([(relCounts.get(x, 0),x) for x in REL_LOOKUP.keys()]))] - # do we really want this? I want out of zone points last and big - for relCode in orderedRels: - svgColor = SVG_COLORS[relCode] - relName, relColor, relStyle = REL_LOOKUP[relCode] - svg.write('<g id="%s" style="stroke:%s; fill:%s; fill-opacity:1.0; stroke-width:1;" cursor="pointer">\n' % (relName, svgColor, svgColor)) - pMap = pointMap.setdefault(relCode, {}) - nPoints = 0 - rpairs=[] - rgenos=[] - rmeans=[] - rsdevs=[] - rz = [] - for x,rel in enumerate(rels): # all pairs - if rel == relCode: - s1,s2 = pairs[x] - pid=(s1,s2) - zmean,zsd,r,zrad = pair_data[pid][:4] - rpairs.append(pairs[x]) - rgenos.append(ngenoL[x]) - rmeans.append(means[x]) - rsdevs.append(sdevs[x]) - rz.append(zrad) - ### Now add the svg point group for this relationship to the svg file - for x in range(len(rmeans)): - svgX = '%d' % ((rmeans[x] - 1.0) * PLOT_WIDTH) # changed so mean scale is 1-2 - svgY = '%d' % (PLOT_HEIGHT - (rsdevs[x] * PLOT_HEIGHT)) # changed so sd scale is 0-1 - s1, s2 = rpairs[x] - (fid1,uid1,did1,mid1,sex1,phe1,iid1,d_sid1,m_sid1) = scache[s1] - (fid2,uid2,did2,mid2,sex2,phe2,iid2,d_sid2,m_sid2) = scache[s2] - ngenos = rgenos[x] - nPoints += 1 - point = pMap.setdefault((svgX, svgY), []) - point.append((rmeans[x], rsdevs[x], fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, ngenos,rz[x])) - for (svgX, svgY) in pMap: - points = pMap[(svgX, svgY)] - svgX = int(svgX) - svgY = int(svgY) - if len(points) > 1: - mmean,dmean = calcMeanSD([p[0] for p in points]) - msdev,dsdev = calcMeanSD([p[1] for p in points]) - mgeno,dgeno = calcMeanSD([p[-1] for p in points]) - mingeno = min([p[-1] for p in points]) - maxgeno = max([p[-1] for p in points]) - svg.write("""<circle cx="%d" cy="%d" r="2" - onmouseover="showBTT(evt, %d, %1.2f, %1.2f, %1.2f, %1.2f, %d, %d, %d, %d, %d)" - onmouseout="hideBTT(evt)" />\n""" % (svgX, svgY, relCode, mmean, dmean, msdev, dsdev, len(points), mgeno, dgeno, mingeno, maxgeno)) - else: - mean, sdev, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, ngenos, zrad = points[0][:12] - rmean = float(relstats[relCode]['mean'][0]) - rsdev = float(relstats[relCode]['sd'][0]) - if zrad < 4: - zrad = 2 - elif 4 < zrad < 9: - zrad = 3 # to 9 - else: # > 9 5=15+ - zrad=zrad/3 - zrad = min(zrad,5) # scale limit - if zrad <= 3: - svg.write('<circle cx="%d" cy="%d" r="%s" onmouseover="showOTT(evt, %d, \'%s,%s,%s,%s\', \'%s,%s,%s,%s\', %1.2f, %1.2f, %s, %1.2f, %1.2f)" onmouseout="hideOTT(evt)" />\n' % (svgX, svgY, zrad, relCode, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, mean, sdev, ngenos, rmean, rsdev)) - else: # highlight pairs a long way from expectation by outlining circle in red - svg.write("""<circle cx="%d" cy="%d" r="%s" style="stroke:red; fill:%s; fill-opacity:1.0; stroke-width:1;" - onmouseover="showOTT(evt, %d, \'%s,%s,%s,%s\', \'%s,%s,%s,%s\', %1.2f, %1.2f, %s, %1.2f, %1.2f)" - onmouseout="hideOTT(evt)" />\n""" % \ - (svgX, svgY, zrad, svgColor, relCode, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, mean, sdev, ngenos, rmean, rsdev)) - svg.write('</g>\n') - - ### Create a pdf as well if indicated on the command line - ### WARNING! for framingham share, with about 50M pairs, this is a 5.5GB pdf! -## if pdftoo: -## pdfname = '%s.pdf' % (title) -## rpy.r.pdf(pdfname, 6, 6) -## rpy.r.par(mai=(1,1,1,0.5)) -## rpy.r('par(xaxs="i",yaxs="i")') -## rpy.r.plot(means, sdevs, main='%s, %d snp' % (title, nSamples), ylab=Y_AXIS_LABEL, xlab=X_AXIS_LABEL, cex=cexs, col=pointColors, pch=pointStyles, xlim=(0,2), ylim=(0,2)) -## rpy.r.legend(LEGEND_ALIGN, legend=REL_STATES, pch=REL_POINTS, col=REL_COLORS, title=LEGEND_TITLE) -## rpy.r.grid(nx=10, ny=10, col='lightgray', lty='dotted') -## rpy.r.dev_off() - - ### Draw polygons - if showPolygons: - svg.write('<g id="polygons" cursor="pointer">\n') - for rel, poly in POLYGONS.items(): - points = ' '.join(['%s,%s' % ((p[0]-1.0)*float(PLOT_WIDTH), (PLOT_HEIGHT - p[1]*PLOT_HEIGHT)) for p in poly]) - svg.write('<polygon points="%s" fill="transparent" style="stroke:%s; stroke-width:1"/>\n' % (points, SVG_COLORS[rel])) - svg.write('</g>\n') - - - svg.write(SVG_FOOTER) - svg.close() - return newfiles,explanations,repOut - -def doIBS(n=100): - """parse parameters from galaxy - expect 'input pbed path' 'basename' 'outpath' 'title' 'logpath' 'n' - <command interpreter="python"> - rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" - '$out_file1' '$out_file1.files_path' "$title" '$n' '$Z' - </command> - - """ - u="""<command interpreter="python"> - rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" - '$out_file1' '$out_file1.files_path' "$title" '$n' '$Z' - </command>""" - - if len(sys.argv) < 8: - print >> sys.stdout, 'Need pbed inpath, basename, out_htmlname, outpath, title, logpath, nSNP, Zcutoff on command line please' - print >> sys.stdout, u - sys.exit(1) - ts = '%s%s' % (string.punctuation,string.whitespace) - ptran = string.maketrans(ts,'_'*len(ts)) - inpath = sys.argv[1] - basename = sys.argv[2] - outhtml = sys.argv[3] - newfilepath = sys.argv[4] - try: - os.makedirs(newfilepath) - except: - pass - title = sys.argv[5].translate(ptran) - logfname = 'Log_%s.txt' % title - logpath = os.path.join(newfilepath,logfname) # log was a child - make part of html extra_files_path zoo - n = int(sys.argv[6]) - try: - Zcutoff = float(sys.argv[7]) - except: - Zcutoff = 2.0 - try: - os.makedirs(newfilepath) - except: - pass - logf = file(logpath,'w') - newfiles,explanations,repOut = doIBSpy(inpath=inpath,basename=basename,outdir=newfilepath, - logf=logf,nrsSamples=n,title=title,pdftoo=0,Zcutoff=Zcutoff) - logf.close() - logfs = file(logpath,'r').readlines() - lf = file(outhtml,'w') - lf.write(galhtmlprefix % PROGNAME) - # this is a mess. todo clean up - should each datatype have it's own directory? Yes - # probably. Then titles are universal - but userId libraries are separate. - s = '<div>Output from %s run at %s<br>\n' % (PROGNAME,timenow()) - lf.write('<h4>%s</h4>\n' % s) - fixed = ["'%s'" % x for x in sys.argv] # add quotes just in case - s = 'If you need to rerun this analysis, the command line was\n<pre>%s</pre>\n</div>' % (' '.join(fixed)) - lf.write(s) - #s = """<object data="%s" type="image/svg+xml" width="%d" height="%d"> - # <embed src="%s" type="image/svg+xml" width="%d" height="%d" /> - # </object>""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT,newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) - s = """ <embed src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) - #s = """ <iframe src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) - lf.write(s) - lf.write('<div><h4>Click the links below to save output files and plots</h4><br><ol>\n') - for i in range(len(newfiles)): - if i == 0: - lf.write('<li><a href="%s" type="image/svg+xml" >%s</a></li>\n' % (newfiles[i],explanations[i])) - else: - lf.write('<li><a href="%s">%s</a></li>\n' % (newfiles[i],explanations[i])) - flist = os.listdir(newfilepath) - for fname in flist: - if not fname in newfiles: - lf.write('<li><a href="%s">%s</a></li>\n' % (fname,fname)) - lf.write('</ol></div>') - lf.write('<div>%s</div>' % ('\n'.join(repOut))) # repOut is a list of tables - lf.write('<div><hr><h3>Log from this job (also stored in %s)</h3><pre>%s</pre><hr></div>' % (logfname,'\n'.join(logfs))) - lf.write('</body></html>\n') - lf.close() - logf.close() - -if __name__ == '__main__': - doIBS() - - +""" +# july 2009: Need to see outliers so need to draw them last? +# could use clustering on the zscores to guess real relationships for unrelateds +# but definitely need to draw last +# added MAX_SHOW_ROWS to limit the length of the main report page +# Changes for Galaxy integration +# added more robust knuth method for one pass mean and sd +# no difference really - let's use scipy.mean() and scipy.std() instead... +# fixed labels and changed to .xls for outlier reports so can open in excel +# interesting - with a few hundred subjects, 5k gives good resolution +# and 100k gives better but not by much +# TODO remove non autosomal markers +# TODO it would be best if label had the zmean and zsd as these are what matter for +# outliers rather than the group mean/sd +# mods to rgGRR.py from channing CVS which John Ziniti has rewritten to produce SVG plots +# to make a Galaxy tool - we need the table of mean and SD for interesting pairs, the SVG and the log +# so the result should be an HTML file + +# rgIBS.py +# use a random subset of markers for a quick ibs +# to identify sample dups and closely related subjects +# try snpMatrix and plink and see which one works best for us? +# abecasis grr plots mean*sd for every subject to show clusters +# mods june 23 rml to avoid non-autosomal markers +# we seem to be distinguishing parent-child by gender - 2 clouds! + + +snpMatrix from David Clayton has: +ibs.stats function to calculate the identity-by-state stats of a group of samples +Description +Given a snp.matrix-class or a X.snp.matrix-class object with N samples, calculates some statistics +about the relatedness of every pair of samples within. + +Usage +ibs.stats(x) +8 ibs.stats +Arguments +x a snp.matrix-class or a X.snp.matrix-class object containing N samples +Details +No-calls are excluded from consideration here. +Value +A data.frame containing N(N - 1)/2 rows, where the row names are the sample name pairs separated +by a comma, and the columns are: +Count count of identical calls, exclusing no-calls +Fraction fraction of identical calls comparied to actual calls being made in both samples +Warning +In some applications, it may be preferable to subset a (random) selection of SNPs first - the +calculation +time increases as N(N - 1)M/2 . Typically for N = 800 samples and M = 3000 SNPs, the +calculation time is about 1 minute. A full GWA scan could take hours, and quite unnecessary for +simple applications such as checking for duplicate or related samples. +Note +This is mostly written to find mislabelled and/or duplicate samples. +Illumina indexes their SNPs in alphabetical order so the mitochondria SNPs comes first - for most +purpose it is undesirable to use these SNPs for IBS purposes. +TODO: Worst-case S4 subsetting seems to make 2 copies of a large object, so one might want to +subset before rbind(), etc; a future version of this routine may contain a built-in subsetting facility +""" +import sys,os,time,random,string,copy,optparse + +try: + set +except NameError: + from Sets import Set as set + +from rgutils import timenow,pruneLD,plinke +import plinkbinJZ + + +opts = None +verbose = False + +showPolygons = False + +class NullDevice: + def write(self, s): + pass + +tempstderr = sys.stderr # save +sys.stderr = NullDevice() +# need to avoid blather about deprecation and other strange stuff from scipy +# the current galaxy job runner assumes that +# the job is in error if anything appears on sys.stderr +# grrrrr. James wants to keep it that way instead of using the +# status flag for some strange reason. Presumably he doesn't use R or (in this case, scipy) +import numpy +import scipy +from scipy import weave + + +sys.stderr=tempstderr + + +PROGNAME = os.path.split(sys.argv[0])[-1] +X_AXIS_LABEL = 'Mean Alleles Shared' +Y_AXIS_LABEL = 'SD Alleles Shared' +LEGEND_ALIGN = 'topleft' +LEGEND_TITLE = 'Relationship' +DEFAULT_SYMBOL_SIZE = 1.0 # default symbol size +DEFAULT_SYMBOL_SIZE = 0.5 # default symbol size + +### Some colors for R/rpy +R_BLACK = 1 +R_RED = 2 +R_GREEN = 3 +R_BLUE = 4 +R_CYAN = 5 +R_PURPLE = 6 +R_YELLOW = 7 +R_GRAY = 8 + +### ... and some point-styles + +### +PLOT_HEIGHT = 600 +PLOT_WIDTH = 1150 + + +#SVG_COLORS = ('black', 'darkblue', 'blue', 'deepskyblue', 'firebrick','maroon','crimson') +#SVG_COLORS = ('cyan','dodgerblue','mediumpurple', 'fuchsia', 'red','gold','gray') +SVG_COLORS = ('cyan','dodgerblue','mediumpurple','forestgreen', 'lightgreen','gold','gray') +# dupe,parentchild,sibpair,halfsib,parents,unrel,unkn +#('orange', 'red', 'green', 'chartreuse', 'blue', 'purple', 'gray') + +OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Rel_Mean)\tSdev(Rel_Mean)\tMean(Rel_Sdev)\tSdev(Rel_Sdev)\n' +OUTLIERS_HEADER_list = ['Mean','Sdev','ZMean','ZSdev','FID1','IID1','FID2','IID2', +'RGMean_M','RGMean_SD','RGSD_M','RGSD_SD'] +TABLE_HEADER='fid1 iid1\tfid2 iid2\tmean\tsdev\tzmean\tzsdev\tgeno\trelcode\n' + + +### Relationship codes, text, and lookups/mappings +N_RELATIONSHIP_TYPES = 7 +REL_DUPE, REL_PARENTCHILD, REL_SIBS, REL_HALFSIBS, REL_RELATED, REL_UNRELATED, REL_UNKNOWN = range(N_RELATIONSHIP_TYPES) +REL_LOOKUP = { + REL_DUPE: ('dupe', R_BLUE, 1), + REL_PARENTCHILD: ('parentchild', R_YELLOW, 1), + REL_SIBS: ('sibpairs', R_RED, 1), + REL_HALFSIBS: ('halfsibs', R_GREEN, 1), + REL_RELATED: ('parents', R_PURPLE, 1), + REL_UNRELATED: ('unrelated', R_CYAN, 1), + REL_UNKNOWN: ('unknown', R_GRAY, 1), + } +OUTLIER_STDEVS = { + REL_DUPE: 2, + REL_PARENTCHILD: 2, + REL_SIBS: 2, + REL_HALFSIBS: 2, + REL_RELATED: 2, + REL_UNRELATED: 3, + REL_UNKNOWN: 2, + } +# note now Z can be passed in + +REL_STATES = [REL_LOOKUP[r][0] for r in range(N_RELATIONSHIP_TYPES)] +REL_COLORS = SVG_COLORS +REL_POINTS = [REL_LOOKUP[r][2] for r in range(N_RELATIONSHIP_TYPES)] + +DEFAULT_MAX_SAMPLE_SIZE = 10000 + +REF_COUNT_HOM1 = 3 +REF_COUNT_HET = 2 +REF_COUNT_HOM2 = 1 +MISSING = 0 +MAX_SHOW_ROWS = 100 # framingham has millions - delays showing output page - so truncate and explain +MARKER_PAIRS_PER_SECOND_SLOW = 15000000.0 +MARKER_PAIRS_PER_SECOND_FAST = 70000000.0 + + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" + + +SVG_HEADER = '''<?xml version="1.0" standalone="no"?> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.2//EN" "http://www.w3.org/Graphics/SVG/1.2/DTD/svg12.dtd"> + +<svg width="1280" height="800" + xmlns="http://www.w3.org/2000/svg" version="1.2" + xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1280 800" onload="init()"> + + <script type="text/ecmascript" xlink:href="/static/scripts/checkbox_and_radiobutton.js"/> + <script type="text/ecmascript" xlink:href="/static/scripts/helper_functions.js"/> + <script type="text/ecmascript" xlink:href="/static/scripts/timer.js"/> + <script type="text/ecmascript"> + <![CDATA[ + var checkBoxes = new Array(); + var radioGroupBandwidth; + var colours = ['%s','%s','%s','%s','%s','%s','%s']; + function init() { + var style = {"font-family":"Arial,Helvetica", "fill":"black", "font-size":12}; + var dist = 12; + var yOffset = 4; + + //A checkBox for each relationship type dupe,parentchild,sibpair,halfsib,parents,unrel,unkn + checkBoxes["dupe"] = new checkBox("dupe","checkboxes",20,40,"cbRect","cbCross",true,"Duplicate",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["parentchild"] = new checkBox("parentchild","checkboxes",20,60,"cbRect","cbCross",true,"Parent-Child",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["sibpairs"] = new checkBox("sibpairs","checkboxes",20,80,"cbRect","cbCross",true,"Sib-pairs",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["halfsibs"] = new checkBox("halfsibs","checkboxes",20,100,"cbRect","cbCross",true,"Half-sibs",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["parents"] = new checkBox("parents","checkboxes",20,120,"cbRect","cbCross",true,"Parents",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["unrelated"] = new checkBox("unrelated","checkboxes",20,140,"cbRect","cbCross",true,"Unrelated",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["unknown"] = new checkBox("unknown","checkboxes",20,160,"cbRect","cbCross",true,"Unknown",style,dist,yOffset,undefined,hideShowLayer); + + } + + function hideShowLayer(id, status, label) { + var vis = "hidden"; + if (status) { + vis = "visible"; + } + document.getElementById(id).setAttributeNS(null, 'visibility', vis); + } + + function showBTT(evt, rel, mm, dm, md, dd, n, mg, dg, lg, hg) { + var x = parseInt(evt.pageX)-250; + var y = parseInt(evt.pageY)-110; + switch(rel) { + case 0: + fill = colours[rel]; + relt = "dupe"; + break; + case 1: + fill = colours[rel]; + relt = "parentchild"; + break; + case 2: + fill = colours[rel]; + relt = "sibpairs"; + break; + case 3: + fill = colours[rel]; + relt = "halfsibs"; + break; + case 4: + fill = colours[rel]; + relt = "parents"; + break; + case 5: + fill = colours[rel]; + relt = "unrelated"; + break; + case 6: + fill = colours[rel]; + relt = "unknown"; + break; + default: + fill = "cyan"; + relt = "ERROR_CODE: "+rel; + } + + document.getElementById("btRel").textContent = "GROUP: "+relt; + document.getElementById("btMean").textContent = "mean="+mm+" +/- "+dm; + document.getElementById("btSdev").textContent = "sdev="+dm+" +/- "+dd; + document.getElementById("btPair").textContent = "npairs="+n; + document.getElementById("btGeno").textContent = "ngenos="+mg+" +/- "+dg+" (min="+lg+", max="+hg+")"; + document.getElementById("btHead").setAttribute('fill', fill); + + var tt = document.getElementById("btTip"); + tt.setAttribute("transform", "translate("+x+","+y+")"); + tt.setAttribute('visibility', 'visible'); + } + + function showOTT(evt, rel, s1, s2, mean, sdev, ngeno, rmean, rsdev) { + var x = parseInt(evt.pageX)-150; + var y = parseInt(evt.pageY)-180; + + switch(rel) { + case 0: + fill = colours[rel]; + relt = "dupe"; + break; + case 1: + fill = colours[rel]; + relt = "parentchild"; + break; + case 2: + fill = colours[rel]; + relt = "sibpairs"; + break; + case 3: + fill = colours[rel]; + relt = "halfsibs"; + break; + case 4: + fill = colours[rel]; + relt = "parents"; + break; + case 5: + fill = colours[rel]; + relt = "unrelated"; + break; + case 6: + fill = colours[rel]; + relt = "unknown"; + break; + default: + fill = "cyan"; + relt = "ERROR_CODE: "+rel; + } + + document.getElementById("otRel").textContent = "PAIR: "+relt; + document.getElementById("otS1").textContent = "s1="+s1; + document.getElementById("otS2").textContent = "s2="+s2; + document.getElementById("otMean").textContent = "mean="+mean; + document.getElementById("otSdev").textContent = "sdev="+sdev; + document.getElementById("otGeno").textContent = "ngenos="+ngeno; + document.getElementById("otRmean").textContent = "relmean="+rmean; + document.getElementById("otRsdev").textContent = "relsdev="+rsdev; + document.getElementById("otHead").setAttribute('fill', fill); + + var tt = document.getElementById("otTip"); + tt.setAttribute("transform", "translate("+x+","+y+")"); + tt.setAttribute('visibility', 'visible'); + } + + function hideBTT(evt) { + document.getElementById("btTip").setAttributeNS(null, 'visibility', 'hidden'); + } + + function hideOTT(evt) { + document.getElementById("otTip").setAttributeNS(null, 'visibility', 'hidden'); + } + + ]]> + </script> + <defs> + <!-- symbols for check boxes --> + <symbol id="cbRect" overflow="visible"> + <rect x="-5" y="-5" width="10" height="10" fill="white" stroke="dimgray" stroke-width="1" cursor="pointer"/> + </symbol> + <symbol id="cbCross" overflow="visible"> + <g pointer-events="none" stroke="black" stroke-width="1"> + <line x1="-3" y1="-3" x2="3" y2="3"/> + <line x1="3" y1="-3" x2="-3" y2="3"/> + </g> + </symbol> + </defs> + +<desc>Developer Works Dynamic Scatter Graph Scaling Example</desc> + +<!-- Now Draw the main X and Y axis --> +<g style="stroke-width:1.0; stroke:black; shape-rendering:crispEdges"> + <!-- X Axis top and bottom --> + <path d="M 100 100 L 1250 100 Z"/> + <path d="M 100 700 L 1250 700 Z"/> + + <!-- Y Axis left and right --> + <path d="M 100 100 L 100 700 Z"/> + <path d="M 1250 100 L 1250 700 Z"/> +</g> + +<g transform="translate(100,100)"> + + <!-- Grid Lines --> + <g style="fill:none; stroke:#dddddd; stroke-width:1; stroke-dasharray:2,2; text-anchor:end; shape-rendering:crispEdges"> + + <!-- Vertical grid lines --> + <line x1="125" y1="0" x2="115" y2="600" /> + <line x1="230" y1="0" x2="230" y2="600" /> + <line x1="345" y1="0" x2="345" y2="600" /> + <line x1="460" y1="0" x2="460" y2="600" /> + <line x1="575" y1="0" x2="575" y2="600" style="stroke-dasharray:none;" /> + <line x1="690" y1="0" x2="690" y2="600" /> + <line x1="805" y1="0" x2="805" y2="600" /> + <line x1="920" y1="0" x2="920" y2="600" /> + <line x1="1035" y1="0" x2="1035" y2="600" /> + + <!-- Horizontal grid lines --> + <line x1="0" y1="60" x2="1150" y2="60" /> + <line x1="0" y1="120" x2="1150" y2="120" /> + <line x1="0" y1="180" x2="1150" y2="180" /> + <line x1="0" y1="240" x2="1150" y2="240" /> + <line x1="0" y1="300" x2="1150" y2="300" style="stroke-dasharray:none;" /> + <line x1="0" y1="360" x2="1150" y2="360" /> + <line x1="0" y1="420" x2="1150" y2="420" /> + <line x1="0" y1="480" x2="1150" y2="480" /> + <line x1="0" y1="540" x2="1150" y2="540" /> + </g> + + <!-- Legend --> + <g style="fill:black; stroke:none" font-size="12" font-family="Arial" transform="translate(25,25)"> + <rect width="160" height="270" style="fill:none; stroke:black; shape-rendering:crispEdges" /> + <text x="5" y="20" style="fill:black; stroke:none;" font-size="13" font-weight="bold">Given Pair Relationship</text> + <rect x="120" y="35" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="55" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="75" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="95" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="115" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="135" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="155" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <text x="15" y="195" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore gt 15</text> + <circle cx="125" cy="192" r="6" style="stroke:red; fill:gold; fill-opacity:1.0; stroke-width:1;"/> + <text x="15" y="215" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore 4 to 15</text> + <circle cx="125" cy="212" r="3" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> + <text x="15" y="235" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore lt 4</text> + <circle cx="125" cy="232" r="2" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> + <g id="checkboxes"> + </g> + </g> + + + <g style='fill:black; stroke:none' font-size="17" font-family="Arial"> + <!-- X Axis Labels --> + <text x="480" y="660">Mean Alleles Shared</text> + <text x="0" y="630" >1.0</text> + <text x="277" y="630" >1.25</text> + <text x="564" y="630" >1.5</text> + <text x="842" y="630" >1.75</text> + <text x="1140" y="630" >2.0</text> + </g> + + <g transform="rotate(270)" style="fill:black; stroke:none" font-size="17" font-family="Arial"> + <!-- Y Axis Labels --> + <text x="-350" y="-40">SD Alleles Shared</text> + <text x="-20" y="-10" >1.0</text> + <text x="-165" y="-10" >0.75</text> + <text x="-310" y="-10" >0.5</text> + <text x="-455" y="-10" >0.25</text> + <text x="-600" y="-10" >0.0</text> + </g> + +<!-- Plot Title --> +<g style="fill:black; stroke:none" font-size="18" font-family="Arial"> + <text x="425" y="-30">%s</text> +</g> + +<!-- One group/layer of points for each relationship type --> +''' + +SVG_FOOTER = ''' +<!-- End of Data --> +</g> +<g id="btTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> + <rect width="250" height="110" style="fill:silver" rx="2" ry="2"/> + <rect id="btHead" width="250" height="20" rx="2" ry="2" /> + <text id="btRel" y="14" x="85">unrelated</text> + <text id="btMean" y="40" x="4">mean=1.5 +/- 0.04</text> + <text id="btSdev" y="60" x="4">sdev=0.7 +/- 0.03</text> + <text id="btPair" y="80" x="4">npairs=1152</text> + <text id="btGeno" y="100" x="4">ngenos=4783 +/- 24 (min=1000, max=5000)</text> +</g> + +<g id="otTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> + <rect width="150" height="180" style="fill:silver" rx="2" ry="2"/> + <rect id="otHead" width="150" height="20" rx="2" ry="2" /> + <text id="otRel" y="14" x="40">sibpairs</text> + <text id="otS1" y="40" x="4">s1=fid1,iid1</text> + <text id="otS2" y="60" x="4">s2=fid2,iid2</text> + <text id="otMean" y="80" x="4">mean=1.82</text> + <text id="otSdev" y="100" x="4">sdev=0.7</text> + <text id="otGeno" y="120" x="4">ngeno=4487</text> + <text id="otRmean" y="140" x="4">relmean=1.85</text> + <text id="otRsdev" y="160" x="4">relsdev=0.65</text> +</g> +</svg> +''' + +OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Mean)\tSdev(Mean)\tMean(Sdev)\tSdev(Sdev)\n' + +DEFAULT_MAX_SAMPLE_SIZE = 5000 + +REF_COUNT_HOM1 = 3 +REF_COUNT_HET = 2 +REF_COUNT_HOM2 = 1 +MISSING = 0 + +MARKER_PAIRS_PER_SECOND_SLOW = 15000000 +MARKER_PAIRS_PER_SECOND_FAST = 70000000 + +POLYGONS = { + REL_UNRELATED: ((1.360, 0.655), (1.385, 0.730), (1.620, 0.575), (1.610, 0.505)), + REL_HALFSIBS: ((1.630, 0.500), (1.630, 0.550), (1.648, 0.540), (1.648, 0.490)), + REL_SIBS: ((1.660, 0.510), (1.665, 0.560), (1.820, 0.410), (1.820, 0.390)), + REL_PARENTCHILD: ((1.650, 0.470), (1.650, 0.490), (1.750, 0.440), (1.750, 0.420)), + REL_DUPE: ((1.970, 0.000), (1.970, 0.150), (2.000, 0.150), (2.000, 0.000)), + } + +def distance(point1, point2): + """ Calculate the distance between two points + """ + (x1,y1) = [float(d) for d in point1] + (x2,y2) = [float(d) for d in point2] + dx = abs(x1 - x2) + dy = abs(y1 - y2) + return math.sqrt(dx**2 + dy**2) + +def point_inside_polygon(x, y, poly): + """ Determine if a point (x,y) is inside a given polygon or not + poly is a list of (x,y) pairs. + + Taken from: http://www.ariel.com.au/a/python-point-int-poly.html + """ + + n = len(poly) + inside = False + + p1x,p1y = poly[0] + for i in range(n+1): + p2x,p2y = poly[i % n] + if y > min(p1y,p2y): + if y <= max(p1y,p2y): + if x <= max(p1x,p2x): + if p1y != p2y: + xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x + if p1x == p2x or x <= xinters: + inside = not inside + p1x,p1y = p2x,p2y + return inside + +def readMap(pedfile): + """ + """ + mapfile = pedfile.replace('.ped', '.map') + marker_list = [] + if os.path.exists(mapfile): + print 'readMap: %s' % (mapfile) + fh = file(mapfile, 'r') + for line in fh: + marker_list.append(line.strip().split()) + fh.close() + print 'readMap: %s markers' % (len(marker_list)) + return marker_list + +def calcMeanSD(useme): + """ + A numerically stable algorithm is given below. It also computes the mean. + This algorithm is due to Knuth,[1] who cites Welford.[2] + n = 0 + mean = 0 + M2 = 0 + + foreach x in data: + n = n + 1 + delta = x - mean + mean = mean + delta/n + M2 = M2 + delta*(x - mean) // This expression uses the new value of mean + end for + + variance_n = M2/n + variance = M2/(n - 1) + """ + mean = 0.0 + M2 = 0.0 + sd = 0.0 + n = len(useme) + if n > 1: + for i,x in enumerate(useme): + delta = x - mean + mean = mean + delta/(i+1) # knuth uses n+=1 at start + M2 = M2 + delta*(x - mean) # This expression uses the new value of mean + variance = M2/(n-1) # assume is sample so lose 1 DOF + sd = pow(variance,0.5) + return mean,sd + + +def doIBSpy(ped=None,basename='',outdir=None,logf=None, + nrsSamples=10000,title='title',pdftoo=0,Zcutoff=2.0): + #def doIBS(pedName, title, nrsSamples=None, pdftoo=False): + """ started with snpmatrix but GRR uses actual IBS counts and sd's + """ + repOut = [] # text strings to add to the html display + refallele = {} + tblf = '%s_table.xls' % (title) + tbl = file(os.path.join(outdir,tblf), 'w') + tbl.write(TABLE_HEADER) + svgf = '%s.svg' % (title) + svg = file(os.path.join(outdir,svgf), 'w') + + nMarkers = len(ped._markers) + if nMarkers < 5: + print sys.stderr, '### ERROR - %d is too few markers for reliable estimation in %s - terminating' % (nMarkers,PROGNAME) + sys.exit(1) + nSubjects = len(ped._subjects) + nrsSamples = min(nMarkers, nrsSamples) + if opts and opts.use_mito: + markers = range(nMarkers) + nrsSamples = min(len(markers), nrsSamples) + sampleIndexes = sorted(random.sample(markers, nrsSamples)) + else: + autosomals = ped.autosomal_indices() + nrsSamples = min(len(autosomals), nrsSamples) + sampleIndexes = sorted(random.sample(autosomals, nrsSamples)) + + print '' + print 'Getting random.sample of %s from %s total' % (nrsSamples, nMarkers) + npairs = (nSubjects*(nSubjects-1))/2 # total rows in table + newfiles=[svgf,tblf] + explanations = ['rgGRR Plot (requires SVG)','Mean by SD alleles shared - %d rows' % npairs] + # these go with the output file links in the html file + s = 'Reading genotypes for %s subjects and %s markers\n' % (nSubjects, nrsSamples) + logf.write(s) + minUsegenos = nrsSamples/2 # must have half? + nGenotypes = nSubjects*nrsSamples + stime = time.time() + emptyRows = set() + genos = numpy.zeros((nSubjects, nrsSamples), dtype=int) + for s in xrange(nSubjects): + nValid = 0 + #getGenotypesByIndices(self, s, mlist, format) + genos[s] = ped.getGenotypesByIndices(s, sampleIndexes, format='ref') + nValid = sum([1 for g in genos[s] if g]) + if not nValid: + emptyRows.add(s) + sub = ped.getSubject(s) + print 'All missing for row %d (%s)' % (s, sub) + logf.write('All missing for row %d (%s)\n' % (s, sub)) + rtime = time.time() - stime + if verbose: + print '@@Read %s genotypes in %s seconds' % (nGenotypes, rtime) + + + ### Now the expensive part. For each pair of subjects, we get the mean number + ### and standard deviation of shared alleles over all of the markers where both + ### subjects have a known genotype. Identical subjects should have mean shared + ### alleles very close to 2.0 with a standard deviation very close to 0.0. + tot = nSubjects*(nSubjects-1)/2 + nprog = tot/10 + nMarkerpairs = tot * nrsSamples + estimatedTimeSlow = nMarkerpairs/MARKER_PAIRS_PER_SECOND_SLOW + estimatedTimeFast = nMarkerpairs/MARKER_PAIRS_PER_SECOND_FAST + + pairs = [] + pair_data = {} + means = [] ## Mean IBS for each pair + ngenoL = [] ## Count of comparable genotypes for each pair + sdevs = [] ## Standard dev for each pair + rels = [] ## A relationship code for each pair + zmeans = [0.0 for x in xrange(tot)] ## zmean score for each pair for the relgroup + zstds = [0.0 for x in xrange(tot)] ## zstd score for each pair for the relgrp + skip = set() + ndone = 0 ## How many have been done so far + + logf.write('Calculating %d pairs, updating every %d pairs...\n' % (tot, nprog)) + logf.write('Estimated time is %2.2f to %2.2f seconds ...\n' % (estimatedTimeFast, estimatedTimeSlow)) + + t1sum = 0 + t2sum = 0 + t3sum = 0 + now = time.time() + scache = {}