Peter van Heusden wrote:
Hi everyone
I've been using the twobit datatype as generated by faToTwoBit (part of Jim Kent's BLAT package) and read by bx-python (bx.seq.twobit). So here's a patch to add the twobit datatype to Galaxy.
Thanks Peter, I've committed your patch to galaxy-central. --nate
Peter
# HG changeset patch # User Peter van Heusden <pvh@sanbi.ac.za> # Date 1307966741 -7200 # Node ID 3b68bc0d67b43af2ce69fb1eeb9160ca053c4c72 # Parent 8bcc0877b39bf10c2330f0651d2409a2b2e9c469 Added TwoBit datatype for twobit binary nucleotide datatype. Sniffer code based on bx-python's bx.seq.twobit.
diff -r 8bcc0877b39b -r 3b68bc0d67b4 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample Fri Jun 10 20:10:09 2011 -0400 +++ b/datatypes_conf.xml.sample Mon Jun 13 14:05:41 2011 +0200 @@ -116,6 +116,7 @@ <datatype extension="svg" type="galaxy.datatypes.images:Image" mimetype="image/svg+xml"/> <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/> <datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true"/> + <datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/> <datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true"/> <datatype extension="memexml" type="galaxy.datatypes.xml:MEMEXml" mimetype="application/xml" display_in_upload="true"/> <datatype extension="blastxml" type="galaxy.datatypes.xml:BlastXml" mimetype="application/xml" display_in_upload="true"/> @@ -279,6 +280,7 @@ defined format first, followed by next-most rigidly defined, and so on. --> + <sniffer type="galaxy.datatypes.binary:TwoBit"/> <sniffer type="galaxy.datatypes.binary:Bam"/> <sniffer type="galaxy.datatypes.binary:Sff"/> <sniffer type="galaxy.datatypes.xml:BlastXml"/> diff -r 8bcc0877b39b -r 3b68bc0d67b4 lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py Fri Jun 10 20:10:09 2011 -0400 +++ b/lib/galaxy/datatypes/binary.py Mon Jun 13 14:05:41 2011 +0200 @@ -6,6 +6,10 @@ from galaxy.datatypes.metadata import MetadataElement from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * +from galaxy import eggs +import pkg_resources +pkg_resources.require( "bx-python" ) +from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE from urllib import urlencode, quote_plus import zipfile, gzip import os, subprocess, tempfile @@ -292,3 +296,29 @@ def get_track_type( self ): return "LineTrack", {"data_standalone": "bigbed"}
+class TwoBit (Binary): + """Class describing a TwoBit format nucleotide file""" + + file_ext = "twobit" + + def sniff(self, filename): + try: + input = file(filename) + magic = struct.unpack(">L", input.read(TWOBIT_MAGIC_SIZE))[0] + if magic == TWOBIT_MAGIC_NUMBER or magic == TWOBIT_MAGIC_NUMBER_SWAP: + return True + except IOError: + return False + + def set_peek(self, dataset, is_multi_byte=False): + if not dataset.dataset.purged: + dataset.peek = "Binary TwoBit format nucleotide file" + dataset.blurb = data.nice_size(dataset.get_size()) + else: + return super(TwoBit, self).set_peek(dataset, is_multi_byte) + + def display_peek(self, dataset): + try: + return dataset.peek + except: + return "Binary TwoBit format nucleotide file (%s)" % (data.nice_size(dataset.get_size()))
___________________________________________________________ Please keep all replies on the list by using "reply all" in your mail client. To manage your subscriptions to this and other Galaxy lists, please use the interface at: