commit/galaxy-central: carlfeberhard: Pull request #168 from Kyle Ellrott: add graph content datatypes SIF, XGMML, RDF
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/f3a985005267/ Changeset: f3a985005267 User: carlfeberhard Date: 2013-08-08 00:07:46 Summary: Pull request #168 from Kyle Ellrott: add graph content datatypes SIF, XGMML, RDF Affected #: 3 files diff -r 31774ed13ae2fc137d231df4b0cb710b8bb47797 -r f3a985005267a8c2022da3c0c8d9af496f8e5901 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -245,6 +245,10 @@ <datatype extension="snpmatrix" type="galaxy.datatypes.genetics:SNPMatrix" display_in_upload="true"/><datatype extension="xls" type="galaxy.datatypes.tabular:Tabular"/><!-- End RGenetics Datatypes --> + <!-- graph datatypes --> + <datatype extension="xgmml" type="galaxy.datatypes.graph:Xgmml" display_in_upload="true"/> + <datatype extension="sif" type="galaxy.datatypes.graph:Sif" display_in_upload="true"/> + <datatype extension="rdf" type="galaxy.datatypes.graph:Rdf" display_in_upload="true"/></registration><sniffers><!-- @@ -281,6 +285,9 @@ <sniffer type="galaxy.datatypes.tabular:Sam"/><sniffer type="galaxy.datatypes.data:Newick"/><sniffer type="galaxy.datatypes.data:Nexus"/> + <sniffer type="galaxy.datatypes.graph:Xgmml"/> + <sniffer type="galaxy.datatypes.graph:Sif"/> + <sniffer type="galaxy.datatypes.graph:Rdf"/><sniffer type="galaxy.datatypes.images:Jpg"/><sniffer type="galaxy.datatypes.images:Png"/><sniffer type="galaxy.datatypes.images:Tiff"/> diff -r 31774ed13ae2fc137d231df4b0cb710b8bb47797 -r f3a985005267a8c2022da3c0c8d9af496f8e5901 lib/galaxy/datatypes/graph.py --- /dev/null +++ b/lib/galaxy/datatypes/graph.py @@ -0,0 +1,110 @@ +""" +Graph content classes. +""" + +import data, tabular, xml + +import logging +log = logging.getLogger( __name__ ) + + +class Xgmml( xml.GenericXml ): + """ + XGMML graph format + (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats). + """ + file_ext = "xgmml" + + def set_peek( self, dataset, is_multi_byte=False ): + """ + Set the peek and blurb text + """ + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'XGMML data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + """ + Determines whether the file is XML or not, should probably actually check if it is a real xgmml file.... + """ + line = '' + with open( filename ) as handle: + line = handle.readline() + + #TODO - Is there a more robust way to do this? + return line.startswith( '<?xml ' ) + + @staticmethod + def merge( split_files, output_file ): + """ + Merging multiple XML files is non-trivial and must be done in subclasses. + """ + if len( split_files ) > 1: + raise NotImplementedError( "Merging multiple XML files is non-trivial " + + "and must be implemented for each XML type" ) + #For one file only, use base class method (move/copy) + data.Text.merge( split_files, output_file ) + + +class Sif( tabular.Tabular ): + """ + SIF graph format + (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats). + + First column: node id + Second column: relationship type + Third to Nth column: target ids for link + """ + file_ext = "sif" + + def set_peek( self, dataset, is_multi_byte=False ): + """ + Set the peek and blurb text + """ + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'SIF data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + """ + Determines whether the file is SIF + """ + print '---------------------------------------- sniffing Siffing' + line = '' + with open( filename ) as infile: + correct = True + for line in infile: + if not line.strip(): + continue + tlen = len( line.split( "\t" ) ) + # may contain 1 or >= 3 columns + if tlen == 2: + correct = False + return correct + + @staticmethod + def merge( split_files, output_file ): + data.Text.merge( split_files, output_file ) + + +#TODO: we might want to look at rdflib or a similar, larger lib/egg +class Rdf( xml.GenericXml ): + """ + Resource Description Framework format (http://www.w3.org/RDF/). + """ + file_ext = "rdf" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'RDF data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' diff -r 31774ed13ae2fc137d231df4b0cb710b8bb47797 -r f3a985005267a8c2022da3c0c8d9af496f8e5901 lib/galaxy/datatypes/registry.py --- a/lib/galaxy/datatypes/registry.py +++ b/lib/galaxy/datatypes/registry.py @@ -2,7 +2,7 @@ Provides mapping between extensions and datatypes, mime-types, etc. """ import os, sys, tempfile, threading, logging, imp -import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary, assembly, ngsindex +import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary, assembly, ngsindex, graph import galaxy.util from galaxy.util.odict import odict from display_applications.application import DisplayApplication Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org