4 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/8bccff9db869/ changeset: 8bccff9db869 user: jmchilton date: 2012-08-16 06:35:28 summary: Remove hard-coding of unsniffable binary types array and manually checking each sniffable type with a seperate function in upload.py. Information on both types is now stored dynamically as static variables in the Binary class. affected #: 4 files diff -r a42eae47ae877073a077527463beb32e251d7035 -r 8bccff9db86973c924b17a4251573e25af7618a4 lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -18,10 +18,31 @@ log = logging.getLogger(__name__) # Currently these supported binary data types must be manually set on upload -unsniffable_binary_formats = [ 'ab1', 'scf', 'h5' ] class Binary( data.Data ): """Binary data""" + sniffable_binary_formats = [] + unsniffable_binary_formats = [] + + @staticmethod + def register_sniffable_binary_format(data_type, ext, type_class): + Binary.sniffable_binary_formats.append({"type": data_type, "ext": ext, "class": type_class}) + + @staticmethod + def register_unsniffable_binary_ext(ext): + Binary.unsniffable_binary_formats.append(ext) + + @staticmethod + def is_sniffable_binary(filename): + for format in Binary.sniffable_binary_formats: + if format["class"]().sniff(filename): + return (format["type"], format["ext"]) + return None + + @staticmethod + def is_ext_unsniffable(ext): + return ext in Binary.unsniffable_binary_formats + def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: @@ -62,6 +83,8 @@ except: return "Binary ab1 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) ) +Binary.register_unsniffable_binary_ext("ab1") + class Bam( Binary ): """Class describing a BAM binary file""" file_ext = "bam" @@ -218,6 +241,8 @@ def get_track_type( self ): return "ReadTrack", {"data": "bai", "index": "summary_tree"} +Binary.register_sniffable_binary_format("bam", "bam", Bam) + class H5( Binary ): """Class describing an HDF5 file""" file_ext = "h5" @@ -235,6 +260,8 @@ except: return "Binary h5 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) ) +Binary.register_unsniffable_binary_ext("h5") + class Scf( Binary ): """Class describing an scf binary sequence file""" file_ext = "scf" @@ -252,6 +279,8 @@ except: return "Binary scf sequence file (%s)" % ( data.nice_size( dataset.get_size() ) ) +Binary.register_unsniffable_binary_ext("scf") + class Sff( Binary ): """ Standard Flowgram Format (SFF) """ file_ext = "sff" @@ -281,6 +310,8 @@ except: return "Binary sff file (%s)" % ( data.nice_size( dataset.get_size() ) ) +Binary.register_sniffable_binary_format("sff", "sff", Sff) + class BigWig(Binary): """ Accessing binary BigWig files from UCSC. @@ -314,6 +345,8 @@ def get_track_type( self ): return "LineTrack", {"data_standalone": "bigwig"} +Binary.register_sniffable_binary_format("bigwig", "bigwig", BigWig) + class BigBed(BigWig): """BigBed support from UCSC.""" def __init__( self, **kwd ): @@ -323,6 +356,8 @@ def get_track_type( self ): return "LineTrack", {"data_standalone": "bigbed"} +Binary.register_sniffable_binary_format("bigbed", "bigbed", BigBed) + class TwoBit (Binary): """Class describing a TwoBit format nucleotide file""" diff -r a42eae47ae877073a077527463beb32e251d7035 -r 8bccff9db86973c924b17a4251573e25af7618a4 lib/galaxy/datatypes/images.py --- a/lib/galaxy/datatypes/images.py +++ b/lib/galaxy/datatypes/images.py @@ -4,6 +4,7 @@ import data import logging +from galaxy.datatypes.binary import Binary from galaxy.datatypes.metadata import MetadataElement from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * @@ -154,6 +155,8 @@ except IndexError: return False +Binary.register_sniffable_binary_format("pdf", "pdf", Pdf) + def create_applet_tag_peek( class_name, archive, params ): text = """ <!--[if !IE]>--> diff -r a42eae47ae877073a077527463beb32e251d7035 -r 8bccff9db86973c924b17a4251573e25af7618a4 lib/galaxy/datatypes/sniff.py --- a/lib/galaxy/datatypes/sniff.py +++ b/lib/galaxy/datatypes/sniff.py @@ -5,7 +5,6 @@ import registry from galaxy import util from galaxy.datatypes.checkers import * -from galaxy.datatypes.binary import unsniffable_binary_formats log = logging.getLogger(__name__) @@ -381,7 +380,7 @@ ext = guess_ext( filename, sniff_order = datatypes_registry.sniff_order, is_multi_byte=is_multi_byte ) if check_binary( filename ): - if ext not in unsniffable_binary_formats and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ): + if not Binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ): raise InappropriateDatasetContentError, 'The binary uploaded file contains inappropriate content.' elif check_html( filename ): raise InappropriateDatasetContentError, 'The uploaded file contains inappropriate HTML content.' diff -r a42eae47ae877073a077527463beb32e251d7035 -r 8bccff9db86973c924b17a4251573e25af7618a4 tools/data_source/upload.py --- a/tools/data_source/upload.py +++ b/tools/data_source/upload.py @@ -58,16 +58,6 @@ return [safe_dict(x) for x in d] else: return d -def check_bam( file_path ): - return Bam().sniff( file_path ) -def check_sff( file_path ): - return Sff().sniff( file_path ) -def check_pdf( file_path ): - return Pdf().sniff( file_path ) -def check_bigwig( file_path ): - return BigWig().sniff( file_path ) -def check_bigbed( file_path ): - return BigBed().sniff( file_path ) def parse_outputs( args ): rval = {} for arg in args: @@ -121,21 +111,11 @@ data_type = 'multi-byte char' ext = sniff.guess_ext( dataset.path, is_multi_byte=True ) # Is dataset content supported sniffable binary? - elif check_bam( dataset.path ): - ext = 'bam' - data_type = 'bam' - elif check_sff( dataset.path ): - ext = 'sff' - data_type = 'sff' - elif check_pdf( dataset.path ): - ext = 'pdf' - data_type = 'pdf' - elif check_bigwig( dataset.path ): - ext = 'bigwig' - data_type = 'bigwig' - elif check_bigbed( dataset.path ): - ext = 'bigbed' - data_type = 'bigbed' + else: + type_info = Binary.is_sniffable_binary( dataset.path ) + if type_info: + data_type = type_info[0] + ext = type_info[1] if not data_type: # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress is_gzipped, is_valid = check_gzip( dataset.path ) @@ -267,10 +247,10 @@ parts = dataset.name.split( "." ) if len( parts ) > 1: ext = parts[1].strip().lower() - if ext not in unsniffable_binary_formats: + if not Binary.is_ext_unsniffable(ext): file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file ) return - elif ext in unsniffable_binary_formats and dataset.file_type != ext: + elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext: err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext ) file_err( err_msg, dataset, json_file ) return https://bitbucket.org/galaxy/galaxy-central/changeset/3338d7629e33/ changeset: 3338d7629e33 user: jmchilton date: 2012-09-04 05:14:36 summary: Merge. affected #: 308 files Diff too large to display. https://bitbucket.org/galaxy/galaxy-central/changeset/ee84b9493b94/ changeset: ee84b9493b94 user: jmchilton date: 2012-09-04 05:25:42 summary: I messed up the previous merge, this fixes that commit. affected #: 1 file diff -r 3338d7629e33fab90b6af8f84aa4a09a9798aafc -r ee84b9493b948a2d0b56ecab1f45e75a952ef631 lib/galaxy/datatypes/sniff.py --- a/lib/galaxy/datatypes/sniff.py +++ b/lib/galaxy/datatypes/sniff.py @@ -5,7 +5,6 @@ import registry from galaxy import util from galaxy.datatypes.checkers import * -from galaxy.datatypes.binary import unsniffable_binary_formats from encodings import search_function as encodings_search_function log = logging.getLogger(__name__) https://bitbucket.org/galaxy/galaxy-central/changeset/7182b04a32a7/ changeset: 7182b04a32a7 user: natefoo date: 2012-09-04 17:07:45 summary: Merged in jmchilton/galaxy-central-binary-type-enhancements (pull request #59) affected #: 4 files diff -r 83434eb47d587fe729252fe9d7ef6e34e499afb8 -r 7182b04a32a7cb70411cbf07a5b10bf6bd59206e lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -18,10 +18,31 @@ log = logging.getLogger(__name__) # Currently these supported binary data types must be manually set on upload -unsniffable_binary_formats = [ 'ab1', 'scf', 'h5' ] class Binary( data.Data ): """Binary data""" + sniffable_binary_formats = [] + unsniffable_binary_formats = [] + + @staticmethod + def register_sniffable_binary_format(data_type, ext, type_class): + Binary.sniffable_binary_formats.append({"type": data_type, "ext": ext, "class": type_class}) + + @staticmethod + def register_unsniffable_binary_ext(ext): + Binary.unsniffable_binary_formats.append(ext) + + @staticmethod + def is_sniffable_binary(filename): + for format in Binary.sniffable_binary_formats: + if format["class"]().sniff(filename): + return (format["type"], format["ext"]) + return None + + @staticmethod + def is_ext_unsniffable(ext): + return ext in Binary.unsniffable_binary_formats + def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: @@ -62,6 +83,8 @@ except: return "Binary ab1 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) ) +Binary.register_unsniffable_binary_ext("ab1") + class Bam( Binary ): """Class describing a BAM binary file""" file_ext = "bam" @@ -218,6 +241,8 @@ def get_track_type( self ): return "ReadTrack", {"data": "bai", "index": "summary_tree"} +Binary.register_sniffable_binary_format("bam", "bam", Bam) + class H5( Binary ): """Class describing an HDF5 file""" file_ext = "h5" @@ -235,6 +260,8 @@ except: return "Binary h5 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) ) +Binary.register_unsniffable_binary_ext("h5") + class Scf( Binary ): """Class describing an scf binary sequence file""" file_ext = "scf" @@ -252,6 +279,8 @@ except: return "Binary scf sequence file (%s)" % ( data.nice_size( dataset.get_size() ) ) +Binary.register_unsniffable_binary_ext("scf") + class Sff( Binary ): """ Standard Flowgram Format (SFF) """ file_ext = "sff" @@ -281,6 +310,8 @@ except: return "Binary sff file (%s)" % ( data.nice_size( dataset.get_size() ) ) +Binary.register_sniffable_binary_format("sff", "sff", Sff) + class BigWig(Binary): """ Accessing binary BigWig files from UCSC. @@ -314,6 +345,8 @@ def get_track_type( self ): return "LineTrack", {"data_standalone": "bigwig"} +Binary.register_sniffable_binary_format("bigwig", "bigwig", BigWig) + class BigBed(BigWig): """BigBed support from UCSC.""" def __init__( self, **kwd ): @@ -323,6 +356,8 @@ def get_track_type( self ): return "LineTrack", {"data_standalone": "bigbed"} +Binary.register_sniffable_binary_format("bigbed", "bigbed", BigBed) + class TwoBit (Binary): """Class describing a TwoBit format nucleotide file""" diff -r 83434eb47d587fe729252fe9d7ef6e34e499afb8 -r 7182b04a32a7cb70411cbf07a5b10bf6bd59206e lib/galaxy/datatypes/images.py --- a/lib/galaxy/datatypes/images.py +++ b/lib/galaxy/datatypes/images.py @@ -4,6 +4,7 @@ import data import logging +from galaxy.datatypes.binary import Binary from galaxy.datatypes.metadata import MetadataElement from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * @@ -154,6 +155,8 @@ except IndexError: return False +Binary.register_sniffable_binary_format("pdf", "pdf", Pdf) + def create_applet_tag_peek( class_name, archive, params ): text = """ <!--[if !IE]>--> diff -r 83434eb47d587fe729252fe9d7ef6e34e499afb8 -r 7182b04a32a7cb70411cbf07a5b10bf6bd59206e lib/galaxy/datatypes/sniff.py --- a/lib/galaxy/datatypes/sniff.py +++ b/lib/galaxy/datatypes/sniff.py @@ -5,7 +5,6 @@ import registry from galaxy import util from galaxy.datatypes.checkers import * -from galaxy.datatypes.binary import unsniffable_binary_formats from encodings import search_function as encodings_search_function log = logging.getLogger(__name__) @@ -380,7 +379,7 @@ ext = guess_ext( filename, sniff_order = datatypes_registry.sniff_order, is_multi_byte=is_multi_byte ) if check_binary( filename ): - if ext not in unsniffable_binary_formats and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ): + if not Binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ): raise InappropriateDatasetContentError, 'The binary uploaded file contains inappropriate content.' elif check_html( filename ): raise InappropriateDatasetContentError, 'The uploaded file contains inappropriate HTML content.' diff -r 83434eb47d587fe729252fe9d7ef6e34e499afb8 -r 7182b04a32a7cb70411cbf07a5b10bf6bd59206e tools/data_source/upload.py --- a/tools/data_source/upload.py +++ b/tools/data_source/upload.py @@ -58,16 +58,6 @@ return [safe_dict(x) for x in d] else: return d -def check_bam( file_path ): - return Bam().sniff( file_path ) -def check_sff( file_path ): - return Sff().sniff( file_path ) -def check_pdf( file_path ): - return Pdf().sniff( file_path ) -def check_bigwig( file_path ): - return BigWig().sniff( file_path ) -def check_bigbed( file_path ): - return BigBed().sniff( file_path ) def parse_outputs( args ): rval = {} for arg in args: @@ -122,21 +112,11 @@ data_type = 'multi-byte char' ext = sniff.guess_ext( dataset.path, is_multi_byte=True ) # Is dataset content supported sniffable binary? - elif check_bam( dataset.path ): - ext = 'bam' - data_type = 'bam' - elif check_sff( dataset.path ): - ext = 'sff' - data_type = 'sff' - elif check_pdf( dataset.path ): - ext = 'pdf' - data_type = 'pdf' - elif check_bigwig( dataset.path ): - ext = 'bigwig' - data_type = 'bigwig' - elif check_bigbed( dataset.path ): - ext = 'bigbed' - data_type = 'bigbed' + else: + type_info = Binary.is_sniffable_binary( dataset.path ) + if type_info: + data_type = type_info[0] + ext = type_info[1] if not data_type: # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress is_gzipped, is_valid = check_gzip( dataset.path ) @@ -268,10 +248,10 @@ parts = dataset.name.split( "." ) if len( parts ) > 1: ext = parts[1].strip().lower() - if ext not in unsniffable_binary_formats: + if not Binary.is_ext_unsniffable(ext): file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file ) return - elif ext in unsniffable_binary_formats and dataset.file_type != ext: + elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext: err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext ) file_err( err_msg, dataset, json_file ) return Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.