[hg] galaxy 3017: Added sff datatype
details: http://www.bx.psu.edu/hg/galaxy/rev/18586d1194f9 changeset: 3017:18586d1194f9 user: rc date: Wed Nov 11 17:55:08 2009 -0500 description: Added sff datatype Also, made changes to upload.py to sniff sff binary files diffstat: lib/galaxy/datatypes/data.py | 34 ++++++++++++++++- lib/galaxy/datatypes/registry.py | 3 + lib/galaxy/datatypes/sniff.py | 3 + lib/galaxy/datatypes/test/1.sff | test-data/1.sff | test/functional/test_sniffing_and_metadata_settings.py | 11 +++++ tools/data_source/upload.py | 4 ++ 7 files changed, 54 insertions(+), 1 deletions(-) diffs (124 lines): diff -r 9b9c3603fd09 -r 18586d1194f9 lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py Wed Nov 11 16:52:18 2009 -0500 +++ b/lib/galaxy/datatypes/data.py Wed Nov 11 17:55:08 2009 -0500 @@ -1,4 +1,4 @@ -import logging, os, sys, time, tempfile +import logging, os, sys, time, tempfile, binascii from galaxy import util from galaxy.util.odict import odict from galaxy.util.bunch import Bunch @@ -455,3 +455,35 @@ class Newick( Text ): pass + +class Sff( Binary ): + """ Standard Flowgram Format (SFF) """ + file_ext = "sff" + def __init__( self, **kwd ): + Binary.__init__(self, **kwd) + def init_meta( self, dataset, copy_from=None ): + Binary.init_meta( self, dataset, copy_from=copy_from ) + def sniff( self, filename ): + ''' + The first 4 bytes of any sff file is '.sff' + + >>> fname = get_test_fname( '1.sff' ) + >>> Sff().sniff( fname ) + True + ''' + header = open( filename ).read(4) + if binascii.b2a_hex( header ) == binascii.hexlify( '.sff' ): + return True + return False + def set_peek( self, dataset ): + if not dataset.dataset.purged: + dataset.peek = "Binary sff file" + dataset.blurb = nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek(self, dataset): + try: + return dataset.peek + except: + return "sff file (%s)" % ( nice_size( dataset.get_size() ) ) diff -r 9b9c3603fd09 -r 18586d1194f9 lib/galaxy/datatypes/registry.py --- a/lib/galaxy/datatypes/registry.py Wed Nov 11 16:52:18 2009 -0500 +++ b/lib/galaxy/datatypes/registry.py Wed Nov 11 17:55:08 2009 -0500 @@ -133,6 +133,7 @@ 'qual454' : qualityscore.QualityScore454(), 'sam' : tabular.Sam(), 'scf' : images.Scf(), + 'sff' : data.Sff(), 'tabular' : tabular.Tabular(), 'taxonomy' : tabular.Taxonomy(), 'txt' : data.Text(), @@ -162,6 +163,7 @@ 'qual454' : 'text/plain', 'sam' : 'text/plain', 'scf' : 'application/octet-stream', + 'sff' : 'application/octet-stream', 'tabular' : 'text/plain', 'taxonomy' : 'text/plain', 'txt' : 'text/plain', @@ -172,6 +174,7 @@ # because some formats are much more flexibly defined than others. if len(self.sniff_order) < 1: self.sniff_order = [ + data.Sff(), xml.BlastXml(), sequence.Maf(), sequence.Lav(), diff -r 9b9c3603fd09 -r 18586d1194f9 lib/galaxy/datatypes/sniff.py --- a/lib/galaxy/datatypes/sniff.py Wed Nov 11 16:52:18 2009 -0500 +++ b/lib/galaxy/datatypes/sniff.py Wed Nov 11 17:55:08 2009 -0500 @@ -249,6 +249,9 @@ >>> fname = get_test_fname('alignment.lav') >>> guess_ext(fname) 'lav' + >>> fname = get_test_fname('1.sff') + >>> guess_ext(fname) + 'sff' """ if sniff_order is None: datatypes_registry = registry.Registry() diff -r 9b9c3603fd09 -r 18586d1194f9 lib/galaxy/datatypes/test/1.sff Binary file lib/galaxy/datatypes/test/1.sff has changed diff -r 9b9c3603fd09 -r 18586d1194f9 test-data/1.sff Binary file test-data/1.sff has changed diff -r 9b9c3603fd09 -r 18586d1194f9 test/functional/test_sniffing_and_metadata_settings.py --- a/test/functional/test_sniffing_and_metadata_settings.py Wed Nov 11 16:52:18 2009 -0500 +++ b/test/functional/test_sniffing_and_metadata_settings.py Wed Nov 11 17:55:08 2009 -0500 @@ -246,6 +246,17 @@ assert latest_hda is not None, "Problem retrieving fastq hda from the database" if not latest_hda.name == '2gen.fastq' and not latest_hda.extension == 'fastq': raise AssertionError, "fastq data type was not correctly sniffed." + def test_0100_sff_datatype( self ): + """Testing correctly sniffing sff format upon upload""" + self.upload_file( '1.sff' ) + self.verify_dataset_correctness( '1.sff' ) + self.check_history_for_string( 'format: <span class="sff">sff' ) + latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \ + .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \ + .first() + assert latest_hda is not None, "Problem retrieving sff hda from the database" + if not latest_hda.name == '1.sff' and not latest_hda.extension == 'sff': + raise AssertionError, "sff data type was not correctly sniffed." def test_9999_clean_up( self ): self.delete_history( id=self.security.encode_id( history1.id ) ) self.logout() diff -r 9b9c3603fd09 -r 18586d1194f9 tools/data_source/upload.py --- a/tools/data_source/upload.py Wed Nov 11 16:52:18 2009 -0500 +++ b/tools/data_source/upload.py Wed Nov 11 17:55:08 2009 -0500 @@ -234,6 +234,10 @@ else: ext = dataset.file_type data_type = ext + elif data_type == 'binary' and ext == 'auto': + # currently we are only sniffing sff binary files + ext = sniff.guess_ext( dataset.path ) + data_type = ext # Save job info for the framework if ext == 'auto' and dataset.ext: ext = dataset.ext
participants (1)
-
Greg Von Kuster