commit/galaxy-central: jgoecks: Server-side refactoring for PhyloViz: move parsers + data providers into data_providers directory and remove phyloviz controller.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/6f9ce8692bb6/ changeset: 6f9ce8692bb6 user: jgoecks date: 2012-10-04 16:34:57 summary: Server-side refactoring for PhyloViz: move parsers + data providers into data_providers directory and remove phyloviz controller. affected #: 14 files diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/data_providers/phyloviz/__init__.py --- /dev/null +++ b/lib/galaxy/visualization/data_providers/phyloviz/__init__.py @@ -0,0 +1,42 @@ +""" Data providers code for PhyloViz """ + +from galaxy.visualization.data_providers.basic import BaseDataProvider +from galaxy.visualization.data_providers.phyloviz.nexusparser import Nexus_Parser +from galaxy.visualization.data_providers.phyloviz.newickparser import Newick_Parser +from galaxy.visualization.data_providers.phyloviz.phyloxmlparser import Phyloxml_Parser + +class PhylovizDataProvider( BaseDataProvider ): + + def __init__( self, original_dataset=None ): + super( PhylovizDataProvider, self ).__init__( original_dataset=original_dataset ) + + def get_data( self ): + """returns [trees], meta + Trees are actually an array of JsonDicts. It's usually one tree, except in the case of Nexus + """ + + jsonDicts, meta = [], {} + file_ext = self.original_dataset.datatype.file_ext + file_name = self.original_dataset.file_name + try: + if file_ext == "nhx": # parses newick files + newickParser = Newick_Parser() + jsonDicts, parseMsg = newickParser.parseFile( file_name ) + elif file_ext == "phyloxml": # parses phyloXML files + phyloxmlParser = Phyloxml_Parser() + jsonDicts, parseMsg = phyloxmlParser.parseFile( file_name ) + elif file_ext == "nex": # parses nexus files + nexusParser = Nexus_Parser() + jsonDicts, parseMsg = nexusParser.parseFile( file_name ) + meta["trees"] = parseMsg + else: + raise Exception("File type is not supported") + + meta["msg"] = parseMsg + + except Exception, e: + raise e + jsonDicts, meta["msg"] = [], "Parse failed" + + return jsonDicts, meta + diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/data_providers/phyloviz/baseparser.py --- /dev/null +++ b/lib/galaxy/visualization/data_providers/phyloviz/baseparser.py @@ -0,0 +1,125 @@ +import json + +class Node(object): + """Node class of PhyloTree, which represents a CLAUDE in a phylogenetic tree""" + def __init__(self, nodeName, **kwargs): + """Creates a node and adds in the typical annotations""" + self.name, self.id = nodeName, kwargs.get("id", 0) + self.depth = kwargs.get("depth", 0) + self.children = [] + + self.isInternal = kwargs.get("isInternal", 0) + self.length, self.bootstrap = kwargs.get("length", 0), kwargs.get("bootstrap", None) + self.events = kwargs.get("events", "") + + # clean up boot strap values + if self.bootstrap == -1: + self.bootstrap = None + + def addChildNode(self, child): + """Adds a child node to the current node""" + if isinstance(child, Node): + self.children.append(child) + else: + self.children += child + + + def __str__(self): + return self.name + " id:" + str(self.id) + ", depth: " + str(self.depth) + + + def toJson(self): + """Converts the data in the node to a dict representation of json""" + thisJson = { + "name" : self.name, + "id" : self.id, + "depth" : self.depth, + "dist" : self.length + } + thisJson = self.addChildrenToJson(thisJson) + thisJson = self.addMiscToJson(thisJson) + return thisJson + + def addChildrenToJson(self, jsonDict): + """Needs a special method to addChildren, such that the key does not appear in the Jsondict when the children is empty + this requirement is due to the layout algorithm used by d3 layout for hiding subtree """ + if len(self.children) > 0: + children = [ node.toJson() for node in self.children] + jsonDict["children"] = children + return jsonDict + + + def addMiscToJson(self, jsonDict): + """Adds other misc attributes to json if they are present""" + if not self.events == "": + jsonDict["events"] = self.events + if not self.bootstrap == None: + jsonDict["bootstrap"] = self.bootstrap + return jsonDict + + + +class PhyloTree(object): + """Standardized python based class to represent the phylogenetic tree parsed from different + phylogenetic file formats.""" + + def __init__(self): + self.root, self.rootAttr = None, {} + self.nodes = {} + self.title = None + self.id = 1 + + def addAttributesToRoot(self, attrDict): + """Adds attributes to root, but first we put it in a temp store and bind it with root when .toJson is called""" + for key, value in attrDict.items(): + self.rootAttr[key] = value + + def makeNode(self, nodeName, **kwargs): + """Called to make a node within PhyloTree, arbitrary kwargs can be passed to annotate nodes + Tracks the number of nodes via internally incremented id""" + kwargs["id"] = self.id + self.id += 1 + return Node(nodeName, **kwargs) + + def addRoot(self, root): + """Creates a root for phyloTree""" + assert isinstance(root, Node) + root.parent = None + self.root = root + + def generateJsonableDict(self): + """Changes itself into a dictonary by recurssively calling the tojson on all its nodes. Think of it + as a dict in an array of dict in an array of dict and so on...""" + jsonTree = "" + if self.root: + assert isinstance(self.root, Node) + jsonTree = self.root.toJson() + for key, value in self.rootAttr.items(): + # transfer temporary stored attr to root + jsonTree[key] = value + else: + raise Exception("Root is not assigned!") + return jsonTree + + + +class Base_Parser(object): + """Base parsers contain all the methods to handle phylogeny tree creation and + converting the data to json that all parsers should have""" + + def __init__(self): + self.phyloTrees = [] + + def parseFile(self, filePath): + """Base method that all phylogeny file parser should have""" + raise Exception("Base method for phylogeny file parsers is not implemented") + + def toJson(self, jsonDict): + """Convenience method to get a json string from a python json dict""" + return json.dumps(jsonDict) + + def _writeJsonToFile(self, filepath, json): + """Writes the file out to the system""" + f = open(filepath, "w") + f.writelines(json) + f.close() diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/data_providers/phyloviz/newickparser.py --- /dev/null +++ b/lib/galaxy/visualization/data_providers/phyloviz/newickparser.py @@ -0,0 +1,185 @@ +from baseparser import Base_Parser, PhyloTree +import re + +class Newick_Parser(Base_Parser): + """For parsing trees stored in the newick format (.nhx) + It is necessarily more complex because this parser is later extended by Nexus for parsing newick as well..""" + + + def __init__(self): + super(Newick_Parser, self).__init__() + + + def parseFile(self, filePath): + """Parses a newick file to obtain the string inside. Returns: jsonableDict""" + with open(filePath, "r") as newickFile: + newickString = newickFile.read() + newickString = newickString.replace("\n", "").replace("\r", "") + return [self.parseData(newickString)], "Success" + + + def parseData(self, newickString): + """To be called on a newickString directly to parse it. Returns: jsonableDict""" + return self._parseNewickToJson(newickString) + + + def _parseNewickToJson(self, newickString, treeName=None, nameMap=None): + """parses a newick representation of a tree into a PhyloTree data structure, + which can be easily converted to json""" + self.phyloTree = PhyloTree() + newickString = self.cleanNewickString(newickString) + if nameMap: + newickString = self._mapName(newickString, nameMap) + + self.phyloTree.root = self.parseNode(newickString, 0) + if nameMap: + self.phyloTree.addAttributesToRoot({"treeName": treeName}) + + return self.phyloTree.generateJsonableDict() + + + def cleanNewickString(self, rawNewick): + """removing semi colon, and illegal json characters (\,',") and white spaces""" + return re.sub(r'\s|;|\"|\'|\\', '', rawNewick) + + + def _makeNodesFromString(self, string, depth): + """elements separated by comma could be empty""" + + if string.find("(") != -1: + raise Exception("Tree is not well form, location: " + string) + + childrenString = string.split(",") + childrenNodes = [] + + for childString in childrenString: + if len(childString) == 0: + continue + nodeInfo = childString.split(":") + name, length, bootstrap = "", None, -1 + if len(nodeInfo) == 2: # has length info + length = nodeInfo[1] + # checking for bootstap values + name = nodeInfo[0] + try: # Nexus may bootstrap in names position + name = float(name) + if 0<= name <= 1: + bootstrap = name + elif 1 <= name <= 100: + bootstrap = name / 100 + name = "" + except ValueError: + name = nodeInfo[0] + else: + name = nodeInfo[0] # string only contains name + node = self.phyloTree.makeNode(name, length=length, depth=depth, bootstrap= bootstrap) + childrenNodes += [node] + return childrenNodes + + + + def _mapName(self, newickString, nameMap): + """ + Necessary to replace names of terms inside nexus representation + Also, its here because Mailaud's doesnt deal with id_strings outside of quotes(" ") + """ + newString = "" + start = 0 + end = 0 + + for i in xrange(len(newickString)): + if newickString[i] == "(" or newickString[i] == ",": + if re.match(r"[,(]", newickString[i+1:]): + continue + else: + end = i + 1 + # i now refers to the starting position of the term to be replaced, + # we will next find j which is the ending pos of the term + for j in xrange(i+1, len(newickString)): + enclosingSymbol = newickString[j] # the immediate symbol after a common or left bracket which denotes the end of a term + if enclosingSymbol == ")" or enclosingSymbol == ":" or enclosingSymbol == ",": + termToReplace = newickString[end:j] + + newString += newickString[start : end] + nameMap[termToReplace] #+ "'" "'" + + start = j + break + + newString += newickString[start:] + return newString + + + def parseNode(self, string, depth): + """ Recursive method for parsing newick string, works by stripping down the string into substring + of newick contained with brackers, which is used to call itself. + Eg ... ( A, B, (D, E)C, F, G ) ... + We will make the preceeding nodes first A, B, then the internal node C, its children D, E, + and finally the succeeding nodes F, G""" + + # Base case where there is only an empty string + if string == "": + return + # Base case there its only an internal claude + if string.find("(") == -1: + return self._makeNodesFromString(string, depth) + + nodes, children = [], [] # nodes refer to the nodes on this level, children refers to the child of the + start = 0 + lenOfPreceedingInternalNodeString = 0 + bracketStack = [] + + for j in xrange(len(string)): + if string[j] == "(": #finding the positions of all the open brackets + bracketStack.append(j) + continue + if string[j] == ")": #finding the positions of all the closed brackets to extract claude + i = bracketStack.pop() + + if len(bracketStack) == 0: # is child of current node + + InternalNode = None + + #First flat call to make nodes of the same depth but from the preceeding string. + startSubstring = string[start + lenOfPreceedingInternalNodeString: i] + preceedingNodes = self._makeNodesFromString(startSubstring, depth) + nodes += preceedingNodes + + # Then We will try to see if the substring has any internal nodes first, make it then make nodes preceeding it and succeeding it. + if j + 1 < len(string): + stringRightOfBracket = string[j+1:] # Eg. '(b:0.4,a:0.3)c:0.3, stringRightOfBracket = c:0.3 + match = re.search(r"[\)\,\(]", stringRightOfBracket) + if match: + indexOfNextSymbol = match.start() + stringRepOfInternalNode = stringRightOfBracket[:indexOfNextSymbol] + internalNodes = self._makeNodesFromString( stringRepOfInternalNode, depth) + if len(internalNodes) > 0: + InternalNode = internalNodes[0] + lenOfPreceedingInternalNodeString = len(stringRepOfInternalNode) + else: # sometimes the node can be the last element of a string + InternalNode = self._makeNodesFromString(string[j+1:], depth)[0] + lenOfPreceedingInternalNodeString = len(string) - j + if InternalNode == None: #creating a generic node if it is unnamed + InternalNode = self.phyloTree.makeNode( "", depth=depth, isInternal=True ) #"internal-" + str(depth) + lenOfPreceedingInternalNodeString = 0 + + # recussive call to make the internal claude + childSubString = string[ i + 1 : j ] + InternalNode.addChildNode(self.parseNode(childSubString, depth + 1)) + + nodes.append(InternalNode) # we append the internal node later to preserve order + + start = j + 1 + continue + + if depth == 0: # if its the root node, we do nothing about it and return + return nodes[0] + + # Adding last most set of children + endString = string[start:] + if string[start-1] == ")": # if the symbol belongs to an internal node which is created previously, then we remove it from the string left to parse + match = re.search(r"[\)\,\(]", endString) + if match: + endOfNodeName = start + match.start() + 1 + endString = string[endOfNodeName:] + nodes += self._makeNodesFromString(endString, depth) + + return nodes diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/data_providers/phyloviz/nexusparser.py --- /dev/null +++ b/lib/galaxy/visualization/data_providers/phyloviz/nexusparser.py @@ -0,0 +1,107 @@ +from newickparser import Newick_Parser +import re + +MAX_READLINES = 200000 + + +class Nexus_Parser(Newick_Parser): + + def __init__(self): + super(Nexus_Parser, self).__init__() + + def parseFile(self, filePath): + """passes a file and extracts its Nexus content.""" + return self.parseNexus(filePath) + + + def parseNexus(self, filename): + """ Nexus data is stored in blocks between a line starting with begin and another line starting with end; + Commends inside square brackets are to be ignored, + For more information: http://wiki.christophchamp.com/index.php/NEXUS_file_format + Nexus can store multiple trees + """ + + with open( filename, "rt") as nex_file: + nexlines = nex_file.readlines() + + rowCount = 0 + inTreeBlock = False # sentinel to check if we are in a tree block + intranslateBlock = False # sentinel to check if we are in the translate region of the tree. Stores synonyms of the labellings + self.inCommentBlock = False + self.nameMapping = None # stores mapping representation used in nexus format + treeNames = [] + + for line in nexlines: + line = line.replace(";\n", "") + lline = line.lower() + + if rowCount > MAX_READLINES or (not nex_file) : + break + rowCount +=1 + # We are only interested in the tree block. + if "begin" in lline and "tree" in lline and not inTreeBlock: + inTreeBlock = True + continue + if inTreeBlock and "end" in lline[:3]: + inTreeBlock, currPhyloTree = False, None + continue + + if inTreeBlock: + + if "title" in lline: # Adding title to the tree + titleLoc = lline.find("title") + title = line[titleLoc + 5:].replace(" ", "") + + continue + + if "translate" in lline: + intranslateBlock = True + self.nameMapping = {} + continue + + if intranslateBlock: + mappingLine = self.splitLinebyWhitespaces(line) + key, value = mappingLine[1], mappingLine[2].replace(",", "").replace("'","") #replacing illegal json characters + self.nameMapping[key] = value + + # Extracting newick Trees + if "tree" in lline: + intranslateBlock = False + + treeLineCols = self.splitLinebyWhitespaces(line) + treeName, newick = treeLineCols[2], treeLineCols[-1] + + if newick == "": # Empty lines can be found in tree blocks + continue + + currPhyloTree = self._parseNewickToJson(newick, treeName, nameMap=self.nameMapping) + + self.phyloTrees.append(currPhyloTree) + treeIndex = len(self.phyloTrees) - 1 + treeNames.append( (treeName, treeIndex) ) # appending name of tree, and its index + continue + + return self.phyloTrees, treeNames + + + def splitLinebyWhitespaces(self, line): + """replace tabs and write spaces to a single write space, so we can properly split it.""" + return re.split(r"\s+", line) + + + def checkComments(self, line): + """Check to see if the line/lines is a comment.""" + if not self.inCommentBlock: + if "[" in line: + if "]" not in line: + self.inCommentBlock = True + else: + return "Nextline" # need to move on to the nextline after getting out of comment + else : + if "]" in line: + if line.rfind("[") > line.rfind("]"): + pass # a comment block is closed but another is open. + else: + self.inCommentBlock = False + return "Nextline" # need to move on to the nextline after getting out of comment + return "" \ No newline at end of file diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/data_providers/phyloviz/phyloxmlparser.py --- /dev/null +++ b/lib/galaxy/visualization/data_providers/phyloviz/phyloxmlparser.py @@ -0,0 +1,133 @@ +from baseparser import Base_Parser, PhyloTree, Node +from xml.etree import ElementTree + +class Phyloxml_Parser(Base_Parser): + """Parses a phyloxml file into a json file that will be passed to PhyloViz for display""" + + def __init__(self): + super(Phyloxml_Parser, self).__init__() + self.phyloTree = PhyloTree() + self.tagsOfInterest = { + "clade": "", + "name" : "name", + "branch_length" : "length", + "confidence" : "bootstrap", + "events" : "events" + } + + def parseFile(self, filePath): + """passes a file and extracts its Phylogeny Tree content.""" + phyloXmlFile = open(filePath, "r") + + xmlTree = ElementTree.parse(phyloXmlFile) + xmlRoot = xmlTree.getroot()[0] + self.nameSpaceIndex = xmlRoot.tag.rfind("}") + 1 # used later by the clean tag method to remove the name space in every element.tag + + phyloRoot = None + for child in xmlRoot: + childTag = self.cleanTag(child.tag) + if childTag == "clade": + phyloRoot = child + elif childTag == "name": + self.phyloTree.title = child.text + + self.phyloTree.root = self.parseNode(phyloRoot, 0) + jsonDict = self.phyloTree.generateJsonableDict() + return [jsonDict], "Success" + + + def parseNode(self, node, depth): + """Parses any node within a phyloxml tree and looks out for claude, which signals the creation of + nodes - internal OR leaf""" + + tag = self.cleanTag(node.tag) + if not tag == "clade": + return None + hasInnerClade = False + + # peeking once for parent and once for child to check if the node is internal + for child in node: + childTag = self.cleanTag(child.tag) + if childTag == "clade": + hasInnerClade = True + break + + if hasInnerClade: # this node is an internal node + currentNode = self._makeInternalNode(node, depth= depth) + for child in node: + child = self.parseNode(child, depth + 1) + if isinstance(child, Node): + currentNode.addChildNode(child) + + else: # this node is a leaf node + currentNode = self._makeLeafNode(node, depth=depth+1) + + return currentNode + + + def _makeLeafNode(self, leafNode, depth = 0 ): + """Makes leaf nodes by calling Phylotree methods""" + node = {} + for child in leafNode: + childTag = self.cleanTag(child.tag) + if childTag in self.tagsOfInterest: + key = self.tagsOfInterest[childTag] # need to map phyloxml terms to ours + node[key] = child.text + + node["depth"] = depth + return self.phyloTree.makeNode(self._getNodeName(leafNode), **node) + + def _getNodeName(self, node, depth=-1): + """Gets the name of a claude. It handles the case where a taxonomy node is involved""" + + def getTagFromTaxonomyNode(node): + """Returns the name of a taxonomy node. A taxonomy node have to be treated differently as the name + is embedded one level deeper""" + phyloxmlTaxoNames = { + "common_name" : "", + "scientific_name" : "", + "code" : "" + } + for child in node: + childTag = self.cleanTag(child.tag) + if childTag in phyloxmlTaxoNames: + return child.text + return "" + + nodeName = "" + for child in node: + childTag = self.cleanTag(child.tag) + if childTag == "name" : + nodeName = child.text + break + elif childTag == "taxonomy": + nodeName = getTagFromTaxonomyNode(child) + break + + return nodeName + + + def _makeInternalNode(self, internalNode, depth=0): + """ Makes an internal node from an element object that is guranteed to be a parent node. + Gets the value of interests like events and appends it to a custom node object that will be passed to PhyloTree to make nodes + """ + node = {} + for child in internalNode: + childTag = self.cleanTag(child.tag) + if childTag == "clade": + continue + elif childTag in self.tagsOfInterest: + if childTag == "events": # events is nested 1 more level deeper than others + key, text = "events", self.cleanTag(child[0].tag) + else: + key = self.tagsOfInterest[childTag] + text = child.text + node[key] = text + + + return self.phyloTree.makeNode(self._getNodeName(internalNode, depth), **node) + + + def cleanTag(self, tagString): + return tagString[self.nameSpaceIndex:] + \ No newline at end of file diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/phyloviz/__init__.py --- a/lib/galaxy/visualization/phyloviz/__init__.py +++ /dev/null @@ -1,1 +0,0 @@ -__author__ = 'Tomithy' diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/phyloviz/baseparser.py --- a/lib/galaxy/visualization/phyloviz/baseparser.py +++ /dev/null @@ -1,125 +0,0 @@ -import json - -class Node(object): - """Node class of PhyloTree, which represents a CLAUDE in a phylogenetic tree""" - def __init__(self, nodeName, **kwargs): - """Creates a node and adds in the typical annotations""" - self.name, self.id = nodeName, kwargs.get("id", 0) - self.depth = kwargs.get("depth", 0) - self.children = [] - - self.isInternal = kwargs.get("isInternal", 0) - self.length, self.bootstrap = kwargs.get("length", 0), kwargs.get("bootstrap", None) - self.events = kwargs.get("events", "") - - # clean up boot strap values - if self.bootstrap == -1: - self.bootstrap = None - - def addChildNode(self, child): - """Adds a child node to the current node""" - if isinstance(child, Node): - self.children.append(child) - else: - self.children += child - - - def __str__(self): - return self.name + " id:" + str(self.id) + ", depth: " + str(self.depth) - - - def toJson(self): - """Converts the data in the node to a dict representation of json""" - thisJson = { - "name" : self.name, - "id" : self.id, - "depth" : self.depth, - "dist" : self.length - } - thisJson = self.addChildrenToJson(thisJson) - thisJson = self.addMiscToJson(thisJson) - return thisJson - - def addChildrenToJson(self, jsonDict): - """Needs a special method to addChildren, such that the key does not appear in the Jsondict when the children is empty - this requirement is due to the layout algorithm used by d3 layout for hiding subtree """ - if len(self.children) > 0: - children = [ node.toJson() for node in self.children] - jsonDict["children"] = children - return jsonDict - - - def addMiscToJson(self, jsonDict): - """Adds other misc attributes to json if they are present""" - if not self.events == "": - jsonDict["events"] = self.events - if not self.bootstrap == None: - jsonDict["bootstrap"] = self.bootstrap - return jsonDict - - - -class PhyloTree(object): - """Standardized python based class to represent the phylogenetic tree parsed from different - phylogenetic file formats.""" - - def __init__(self): - self.root, self.rootAttr = None, {} - self.nodes = {} - self.title = None - self.id = 1 - - def addAttributesToRoot(self, attrDict): - """Adds attributes to root, but first we put it in a temp store and bind it with root when .toJson is called""" - for key, value in attrDict.items(): - self.rootAttr[key] = value - - def makeNode(self, nodeName, **kwargs): - """Called to make a node within PhyloTree, arbitrary kwargs can be passed to annotate nodes - Tracks the number of nodes via internally incremented id""" - kwargs["id"] = self.id - self.id += 1 - return Node(nodeName, **kwargs) - - def addRoot(self, root): - """Creates a root for phyloTree""" - assert isinstance(root, Node) - root.parent = None - self.root = root - - def generateJsonableDict(self): - """Changes itself into a dictonary by recurssively calling the tojson on all its nodes. Think of it - as a dict in an array of dict in an array of dict and so on...""" - jsonTree = "" - if self.root: - assert isinstance(self.root, Node) - jsonTree = self.root.toJson() - for key, value in self.rootAttr.items(): - # transfer temporary stored attr to root - jsonTree[key] = value - else: - raise Exception("Root is not assigned!") - return jsonTree - - - -class Base_Parser(object): - """Base parsers contain all the methods to handle phylogeny tree creation and - converting the data to json that all parsers should have""" - - def __init__(self): - self.phyloTrees = [] - - def parseFile(self, filePath): - """Base method that all phylogeny file parser should have""" - raise Exception("Base method for phylogeny file parsers is not implemented") - - def toJson(self, jsonDict): - """Convenience method to get a json string from a python json dict""" - return json.dumps(jsonDict) - - def _writeJsonToFile(self, filepath, json): - """Writes the file out to the system""" - f = open(filepath, "w") - f.writelines(json) - f.close() diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/phyloviz/newickparser.py --- a/lib/galaxy/visualization/phyloviz/newickparser.py +++ /dev/null @@ -1,185 +0,0 @@ -from baseparser import Base_Parser, PhyloTree -import re - -class Newick_Parser(Base_Parser): - """For parsing trees stored in the newick format (.nhx) - It is necessarily more complex because this parser is later extended by Nexus for parsing newick as well..""" - - - def __init__(self): - super(Newick_Parser, self).__init__() - - - def parseFile(self, filePath): - """Parses a newick file to obtain the string inside. Returns: jsonableDict""" - with open(filePath, "r") as newickFile: - newickString = newickFile.read() - newickString = newickString.replace("\n", "").replace("\r", "") - return [self.parseData(newickString)], "Success" - - - def parseData(self, newickString): - """To be called on a newickString directly to parse it. Returns: jsonableDict""" - return self._parseNewickToJson(newickString) - - - def _parseNewickToJson(self, newickString, treeName=None, nameMap=None): - """parses a newick representation of a tree into a PhyloTree data structure, - which can be easily converted to json""" - self.phyloTree = PhyloTree() - newickString = self.cleanNewickString(newickString) - if nameMap: - newickString = self._mapName(newickString, nameMap) - - self.phyloTree.root = self.parseNode(newickString, 0) - if nameMap: - self.phyloTree.addAttributesToRoot({"treeName": treeName}) - - return self.phyloTree.generateJsonableDict() - - - def cleanNewickString(self, rawNewick): - """removing semi colon, and illegal json characters (\,',") and white spaces""" - return re.sub(r'\s|;|\"|\'|\\', '', rawNewick) - - - def _makeNodesFromString(self, string, depth): - """elements separated by comma could be empty""" - - if string.find("(") != -1: - raise Exception("Tree is not well form, location: " + string) - - childrenString = string.split(",") - childrenNodes = [] - - for childString in childrenString: - if len(childString) == 0: - continue - nodeInfo = childString.split(":") - name, length, bootstrap = "", None, -1 - if len(nodeInfo) == 2: # has length info - length = nodeInfo[1] - # checking for bootstap values - name = nodeInfo[0] - try: # Nexus may bootstrap in names position - name = float(name) - if 0<= name <= 1: - bootstrap = name - elif 1 <= name <= 100: - bootstrap = name / 100 - name = "" - except ValueError: - name = nodeInfo[0] - else: - name = nodeInfo[0] # string only contains name - node = self.phyloTree.makeNode(name, length=length, depth=depth, bootstrap= bootstrap) - childrenNodes += [node] - return childrenNodes - - - - def _mapName(self, newickString, nameMap): - """ - Necessary to replace names of terms inside nexus representation - Also, its here because Mailaud's doesnt deal with id_strings outside of quotes(" ") - """ - newString = "" - start = 0 - end = 0 - - for i in xrange(len(newickString)): - if newickString[i] == "(" or newickString[i] == ",": - if re.match(r"[,(]", newickString[i+1:]): - continue - else: - end = i + 1 - # i now refers to the starting position of the term to be replaced, - # we will next find j which is the ending pos of the term - for j in xrange(i+1, len(newickString)): - enclosingSymbol = newickString[j] # the immediate symbol after a common or left bracket which denotes the end of a term - if enclosingSymbol == ")" or enclosingSymbol == ":" or enclosingSymbol == ",": - termToReplace = newickString[end:j] - - newString += newickString[start : end] + nameMap[termToReplace] #+ "'" "'" + - start = j - break - - newString += newickString[start:] - return newString - - - def parseNode(self, string, depth): - """ Recursive method for parsing newick string, works by stripping down the string into substring - of newick contained with brackers, which is used to call itself. - Eg ... ( A, B, (D, E)C, F, G ) ... - We will make the preceeding nodes first A, B, then the internal node C, its children D, E, - and finally the succeeding nodes F, G""" - - # Base case where there is only an empty string - if string == "": - return - # Base case there its only an internal claude - if string.find("(") == -1: - return self._makeNodesFromString(string, depth) - - nodes, children = [], [] # nodes refer to the nodes on this level, children refers to the child of the - start = 0 - lenOfPreceedingInternalNodeString = 0 - bracketStack = [] - - for j in xrange(len(string)): - if string[j] == "(": #finding the positions of all the open brackets - bracketStack.append(j) - continue - if string[j] == ")": #finding the positions of all the closed brackets to extract claude - i = bracketStack.pop() - - if len(bracketStack) == 0: # is child of current node - - InternalNode = None - - #First flat call to make nodes of the same depth but from the preceeding string. - startSubstring = string[start + lenOfPreceedingInternalNodeString: i] - preceedingNodes = self._makeNodesFromString(startSubstring, depth) - nodes += preceedingNodes - - # Then We will try to see if the substring has any internal nodes first, make it then make nodes preceeding it and succeeding it. - if j + 1 < len(string): - stringRightOfBracket = string[j+1:] # Eg. '(b:0.4,a:0.3)c:0.3, stringRightOfBracket = c:0.3 - match = re.search(r"[\)\,\(]", stringRightOfBracket) - if match: - indexOfNextSymbol = match.start() - stringRepOfInternalNode = stringRightOfBracket[:indexOfNextSymbol] - internalNodes = self._makeNodesFromString( stringRepOfInternalNode, depth) - if len(internalNodes) > 0: - InternalNode = internalNodes[0] - lenOfPreceedingInternalNodeString = len(stringRepOfInternalNode) - else: # sometimes the node can be the last element of a string - InternalNode = self._makeNodesFromString(string[j+1:], depth)[0] - lenOfPreceedingInternalNodeString = len(string) - j - if InternalNode == None: #creating a generic node if it is unnamed - InternalNode = self.phyloTree.makeNode( "", depth=depth, isInternal=True ) #"internal-" + str(depth) - lenOfPreceedingInternalNodeString = 0 - - # recussive call to make the internal claude - childSubString = string[ i + 1 : j ] - InternalNode.addChildNode(self.parseNode(childSubString, depth + 1)) - - nodes.append(InternalNode) # we append the internal node later to preserve order - - start = j + 1 - continue - - if depth == 0: # if its the root node, we do nothing about it and return - return nodes[0] - - # Adding last most set of children - endString = string[start:] - if string[start-1] == ")": # if the symbol belongs to an internal node which is created previously, then we remove it from the string left to parse - match = re.search(r"[\)\,\(]", endString) - if match: - endOfNodeName = start + match.start() + 1 - endString = string[endOfNodeName:] - nodes += self._makeNodesFromString(endString, depth) - - return nodes diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/phyloviz/nexusparser.py --- a/lib/galaxy/visualization/phyloviz/nexusparser.py +++ /dev/null @@ -1,107 +0,0 @@ -from newickparser import Newick_Parser -import re - -MAX_READLINES = 200000 - - -class Nexus_Parser(Newick_Parser): - - def __init__(self): - super(Nexus_Parser, self).__init__() - - def parseFile(self, filePath): - """passes a file and extracts its Nexus content.""" - return self.parseNexus(filePath) - - - def parseNexus(self, filename): - """ Nexus data is stored in blocks between a line starting with begin and another line starting with end; - Commends inside square brackets are to be ignored, - For more information: http://wiki.christophchamp.com/index.php/NEXUS_file_format - Nexus can store multiple trees - """ - - with open( filename, "rt") as nex_file: - nexlines = nex_file.readlines() - - rowCount = 0 - inTreeBlock = False # sentinel to check if we are in a tree block - intranslateBlock = False # sentinel to check if we are in the translate region of the tree. Stores synonyms of the labellings - self.inCommentBlock = False - self.nameMapping = None # stores mapping representation used in nexus format - treeNames = [] - - for line in nexlines: - line = line.replace(";\n", "") - lline = line.lower() - - if rowCount > MAX_READLINES or (not nex_file) : - break - rowCount +=1 - # We are only interested in the tree block. - if "begin" in lline and "tree" in lline and not inTreeBlock: - inTreeBlock = True - continue - if inTreeBlock and "end" in lline[:3]: - inTreeBlock, currPhyloTree = False, None - continue - - if inTreeBlock: - - if "title" in lline: # Adding title to the tree - titleLoc = lline.find("title") - title = line[titleLoc + 5:].replace(" ", "") - - continue - - if "translate" in lline: - intranslateBlock = True - self.nameMapping = {} - continue - - if intranslateBlock: - mappingLine = self.splitLinebyWhitespaces(line) - key, value = mappingLine[1], mappingLine[2].replace(",", "").replace("'","") #replacing illegal json characters - self.nameMapping[key] = value - - # Extracting newick Trees - if "tree" in lline: - intranslateBlock = False - - treeLineCols = self.splitLinebyWhitespaces(line) - treeName, newick = treeLineCols[2], treeLineCols[-1] - - if newick == "": # Empty lines can be found in tree blocks - continue - - currPhyloTree = self._parseNewickToJson(newick, treeName, nameMap=self.nameMapping) - - self.phyloTrees.append(currPhyloTree) - treeIndex = len(self.phyloTrees) - 1 - treeNames.append( (treeName, treeIndex) ) # appending name of tree, and its index - continue - - return self.phyloTrees, treeNames - - - def splitLinebyWhitespaces(self, line): - """replace tabs and write spaces to a single write space, so we can properly split it.""" - return re.split(r"\s+", line) - - - def checkComments(self, line): - """Check to see if the line/lines is a comment.""" - if not self.inCommentBlock: - if "[" in line: - if "]" not in line: - self.inCommentBlock = True - else: - return "Nextline" # need to move on to the nextline after getting out of comment - else : - if "]" in line: - if line.rfind("[") > line.rfind("]"): - pass # a comment block is closed but another is open. - else: - self.inCommentBlock = False - return "Nextline" # need to move on to the nextline after getting out of comment - return "" \ No newline at end of file diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/phyloviz/phyloviz_dataprovider.py --- a/lib/galaxy/visualization/phyloviz/phyloviz_dataprovider.py +++ /dev/null @@ -1,36 +0,0 @@ -from newickparser import Newick_Parser -from nexusparser import Nexus_Parser -from phyloxmlparser import Phyloxml_Parser -from galaxy.visualization.data_providers.basic import BaseDataProvider - -class Phyloviz_DataProvider( BaseDataProvider ): - - def __init__( self, original_dataset=None ): - super( BaseDataProvider, self ).__init__( original_dataset=original_dataset ) - - def get_data( self, **kwargs ): - """returns [trees], meta - Trees are actually an array of JsonDicts. It's usually one tree, except in the case of Nexus - """ - jsonDicts, meta = [], {} - try: - if fileExt == "nhx": # parses newick files - newickParser = Newick_Parser() - jsonDicts, parseMsg = newickParser.parseFile(filepath) - elif fileExt == "phyloxml": # parses phyloXML files - phyloxmlParser = Phyloxml_Parser() - jsonDicts, parseMsg = phyloxmlParser.parseFile(filepath) - elif fileExt == "nex": # parses nexus files - nexusParser = Nexus_Parser() - jsonDicts, parseMsg = nexusParser.parseFile(filepath) - meta["trees"] = parseMsg - else: - raise Exception("File type is not supported") - - meta["msg"] = parseMsg - - except Exception: - jsonDicts, meta["msg"] = [], "Parse failed" - - return jsonDicts, meta - diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/visualization/phyloviz/phyloxmlparser.py --- a/lib/galaxy/visualization/phyloviz/phyloxmlparser.py +++ /dev/null @@ -1,134 +0,0 @@ -from baseparser import Base_Parser, PhyloTree, Node -from xml.etree import ElementTree - -class Phyloxml_Parser(Base_Parser): - """Parses a phyloxml file into a json file that will be passed to PhyloViz for display""" - - def __init__(self): - super(Phyloxml_Parser, self).__init__() - self.phyloTree = PhyloTree() - self.tagsOfInterest = { - "clade": "", - "name" : "name", - "branch_length" : "length", - "confidence" : "bootstrap", - "events" : "events" - } - - def parseFile(self, filePath): - """passes a file and extracts its Phylogeny Tree content.""" - phyloXmlFile = open(filePath, "r") - - xmlTree = ElementTree.parse(phyloXmlFile) - xmlRoot = xmlTree.getroot()[0] - self.nameSpaceIndex = xmlRoot.tag.rfind("}") + 1 # used later by the clean tag method to remove the name space in every element.tag - - phyloRoot = None - for child in xmlRoot: - childTag = self.cleanTag(child.tag) - if childTag == "clade": - phyloRoot = child - elif childTag == "name": - self.phyloTree.title = child.text - - self.phyloTree.root = self.parseNode(phyloRoot, 0) - jsonDict = self.phyloTree.generateJsonableDict() - return [jsonDict], "Success" - - - def parseNode(self, node, depth): - """Parses any node within a phyloxml tree and looks out for claude, which signals the creation of - nodes - internal OR leaf""" - assert isinstance(node, etree._Element) - - tag = self.cleanTag(node.tag) - if not tag == "clade": - return None - hasInnerClade = False - - # peeking once for parent and once for child to check if the node is internal - for child in node: - childTag = self.cleanTag(child.tag) - if childTag == "clade": - hasInnerClade = True - break - - if hasInnerClade: # this node is an internal node - currentNode = self._makeInternalNode(node, depth= depth) - for child in node: - child = self.parseNode(child, depth + 1) - if isinstance(child, Node): - currentNode.addChildNode(child) - - else: # this node is a leaf node - currentNode = self._makeLeafNode(node, depth=depth+1) - - return currentNode - - - def _makeLeafNode(self, leafNode, depth = 0 ): - """Makes leaf nodes by calling Phylotree methods""" - node = {} - for child in leafNode: - childTag = self.cleanTag(child.tag) - if childTag in self.tagsOfInterest: - key = self.tagsOfInterest[childTag] # need to map phyloxml terms to ours - node[key] = child.text - - node["depth"] = depth - return self.phyloTree.makeNode(self._getNodeName(leafNode), **node) - - def _getNodeName(self, node, depth=-1): - """Gets the name of a claude. It handles the case where a taxonomy node is involved""" - - def getTagFromTaxonomyNode(node): - """Returns the name of a taxonomy node. A taxonomy node have to be treated differently as the name - is embedded one level deeper""" - phyloxmlTaxoNames = { - "common_name" : "", - "scientific_name" : "", - "code" : "" - } - for child in node: - childTag = self.cleanTag(child.tag) - if childTag in phyloxmlTaxoNames: - return child.text - return "" - - nodeName = "" - for child in node: - childTag = self.cleanTag(child.tag) - if childTag == "name" : - nodeName = child.text - break - elif childTag == "taxonomy": - nodeName = getTagFromTaxonomyNode(child) - break - - return nodeName - - - def _makeInternalNode(self, internalNode, depth=0): - """ Makes an internal node from an element object that is guranteed to be a parent node. - Gets the value of interests like events and appends it to a custom node object that will be passed to PhyloTree to make nodes - """ - node = {} - for child in internalNode: - childTag = self.cleanTag(child.tag) - if childTag == "clade": - continue - elif childTag in self.tagsOfInterest: - if childTag == "events": # events is nested 1 more level deeper than others - key, text = "events", self.cleanTag(child[0].tag) - else: - key = self.tagsOfInterest[childTag] - text = child.text - node[key] = text - - - return self.phyloTree.makeNode(self._getNodeName(internalNode, depth), **node) - - - def cleanTag(self, tagString): - return tagString[self.nameSpaceIndex:] - \ No newline at end of file diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/webapps/galaxy/controllers/phyloviz.py --- a/lib/galaxy/webapps/galaxy/controllers/phyloviz.py +++ /dev/null @@ -1,97 +0,0 @@ -import pkg_resources -pkg_resources.require( "bx-python" ) - -from galaxy.util.json import to_json_string, from_json_string -from galaxy.web.base.controller import * -from galaxy.visualization.phyloviz.phyloviz_dataprovider import Phyloviz_DataProvider - - -class PhyloVizController( BaseUIController, UsesVisualizationMixin, UsesHistoryDatasetAssociationMixin, SharableMixin ): - """ - Controller for phyloViz browser interface. - """ - def __init__(self, app ): - BaseUIController.__init__( self, app ) - - @web.expose - @web.require_login() - def index( self, trans, dataset_id = None, **kwargs ): - """ - The index method is called using phyloviz/ with a dataset id passed in. - The relevant data set is then retrieved via get_json_from_datasetId which interfaces with the parser - The json representation of the phylogenetic tree along with the config is then written in the .mako template and passed back to the user - """ - json, config = self.get_json_from_datasetId(trans, dataset_id) - config["saved_visualization"] = False - return trans.fill_template( "visualization/phyloviz.mako", data = json, config=config) - - - @web.expose - def visualization(self, trans, id): - """ - Called using a viz_id (id) to retrieved stored visualization data (in json format) and all the viz_config - """ - viz = self.get_visualization(trans, id) - config = self.get_visualization_config(trans, viz) - config["saved_visualization"] = True - data = config["root"] - - return trans.fill_template( "visualization/phyloviz.mako", data = data, config=config) - - - @web.expose - @web.json - def load_visualization_json(self, trans, viz_id): - """ - Though not used in current implementation, this provides user with a convenient method to retrieve the viz_data & viz_config via json. - """ - viz = self.get_visualization(trans, viz_id) - viz_config = self.get_visualization_config(trans, viz) - viz_config["saved_visualization"] = True - return { - "data" : viz_config["root"], - "config" : viz_config - } - - - @web.expose - @web.json - def getJsonData(self, trans, dataset_id, treeIndex=0): - """ - Method to retrieve data asynchronously via json format. Retriving from here rather than - making a direct datasets/ call allows for some processing and event capturing - """ - treeIndex = int(treeIndex) - json, config = self.get_json_from_datasetId(trans, dataset_id, treeIndex) - packedJson = { - "data" : json, - "config" : config - } - - return packedJson - - - def get_json_from_datasetId(self, trans, dataset_id, treeIndex=0): - """ - For interfacing phyloviz controllers with phyloviz visualization data provider (parsers) - """ - dataset = self.get_dataset(trans, dataset_id) - fileExt, filepath = dataset.ext, dataset.file_name # .name stores the name of the dataset from the orginal upload - json, config = "", {} # config contains properties of the tree and file - - if fileExt == "json": - something, json = self.get_data(dataset) - else: - try: - pd = Phyloviz_DataProvider() - json, config = pd.get_data(filepath, fileExt) - json = json[treeIndex] - except Exception: - pass - - config["title"] = dataset.display_name() - config["ext"] = fileExt - config["dataset_id"] = dataset_id - config["treeIndex"] = treeIndex - - return json, config diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 lib/galaxy/webapps/galaxy/controllers/visualization.py --- a/lib/galaxy/webapps/galaxy/controllers/visualization.py +++ b/lib/galaxy/webapps/galaxy/controllers/visualization.py @@ -7,7 +7,7 @@ from galaxy.util.sanitize_html import sanitize_html from galaxy.visualization.genomes import decode_dbkey from galaxy.visualization.genome.visual_analytics import get_dataset_job -from galaxy.visualization.data_providers.basic import ColumnDataProvider +from galaxy.visualization.data_providers.phyloviz import PhylovizDataProvider from .library import LibraryListGrid @@ -824,6 +824,25 @@ historyID=history_id, kwargs=kwargs ) + @web.expose + def phyloviz( self, trans, dataset_id, tree_index=0, **kwargs ): + # Get HDA. + hda = self.get_dataset( trans, dataset_id, check_ownership=False, check_accessible=True ) + + # Get data. + pd = PhylovizDataProvider( original_dataset=hda ) + json, config = pd.get_data() + json = json[tree_index] + + config["title"] = hda.display_name() + config["ext"] = hda.datatype.file_ext + config["dataset_id"] = dataset_id + config["treeIndex"] = tree_index + config["saved_visualization"] = False + + # Return viz. + return trans.fill_template_mako( "visualization/phyloviz.mako", data = json, config=config ) + @web.json def bookmarks_from_dataset( self, trans, hda_id=None, ldda_id=None ): if hda_id: diff -r 569f7d07801074a4e5207493afe72354ccb8d3ad -r 6f9ce8692bb6aae38d720abd489f8e0edfd5f8c3 templates/root/history_common.mako --- a/templates/root/history_common.mako +++ b/templates/root/history_common.mako @@ -229,8 +229,8 @@ visualizations = data.get_visualizations() ## HACK: if there are visualizations, only provide trackster for now ## since others are not ready. - comment out to see all WIP visualizations - if visualizations: - visualizations = [ vis for vis in visualizations if vis in [ 'trackster' ] ] + #if visualizations: + # visualizations = [ vis for vis in visualizations if vis in [ 'trackster' ] ] %> %if visualizations: <a href="${h.url_for( controller='visualization' )}" Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket