1 new changeset in galaxy-central:
http://bitbucket.org/galaxy/galaxy-central/changeset/e7214c69ed7d/ changeset: e7214c69ed7d user: greg date: 2011-07-27 22:14:15 summary: Add an altered version of Jelle Scholtalbers' enhancement code to support uploding various image data types. I've moved some of the code components around from Jelle's version, and added some fixes. The cod ein the new image_util.py file enables detections of images types without the need to create a new Image() class.
I've also added baseline support of the HDF5 data type in this change set, but it is currently required to be in the unsniffable_data_types list. affected #: 7 files (9.8 KB)
--- a/datatypes_conf.xml.sample Tue Jul 26 16:14:31 2011 -0400 +++ b/datatypes_conf.xml.sample Wed Jul 27 16:14:15 2011 -0400 @@ -72,9 +72,10 @@ <!-- <display file="gbrowse/gbrowse_gff.xml" inherit="True" /> --></datatype><datatype extension="gff3" type="galaxy.datatypes.interval:Gff3" display_in_upload="true"/> - <datatype extension="gif" type="galaxy.datatypes.images:Image" mimetype="image/gif"/> + <datatype extension="gif" type="galaxy.datatypes.images:Gif" mimetype="image/gif"/><datatype extension="gmaj.zip" type="galaxy.datatypes.images:Gmaj" mimetype="application/zip"/><datatype extension="gtf" type="galaxy.datatypes.interval:Gtf" display_in_upload="true"/> + <datatype extension="h5" type="galaxy.datatypes.data:Data" mimetype="application/octet-stream"/><datatype extension="html" type="galaxy.datatypes.images:Html" mimetype="text/html"/><datatype extension="interval" type="galaxy.datatypes.interval:Interval" display_in_upload="true"><converter file="interval_to_bed_converter.xml" target_datatype="bed"/> @@ -90,7 +91,21 @@ <datatype extension="picard_interval_list" type="galaxy.datatypes.data:Text" subclass="True" display_in_upload="True"/><datatype extension="gatk_interval" type="galaxy.datatypes.data:Text" subclass="True" display_in_upload="True"/><datatype extension="gatk_dbsnp" type="galaxy.datatypes.data:Text" subclass="True" display_in_upload="True"/> - <datatype extension="jpg" type="galaxy.datatypes.images:Image" mimetype="image/jpeg"/> + <datatype extension="jpg" type="galaxy.datatypes.images:Jpg" mimetype="image/jpeg"/> + <datatype extension="tiff" type="galaxy.datatypes.images:Tiff" mimetype="image/tiff"/> + <datatype extension="bmp" type="galaxy.datatypes.images:Bmp" mimetype="image/bmp"/> + <datatype extension="im" type="galaxy.datatypes.images:Im" mimetype="image/im"/> + <datatype extension="pcd" type="galaxy.datatypes.images:Pcd" mimetype="image/pcd"/> + <datatype extension="pcx" type="galaxy.datatypes.images:Pcx" mimetype="image/pcx"/> + <datatype extension="ppm" type="galaxy.datatypes.images:Ppm" mimetype="image/ppm"/> + <datatype extension="psd" type="galaxy.datatypes.images:Psd" mimetype="image/psd"/> + <datatype extension="xbm" type="galaxy.datatypes.images:Xbm" mimetype="image/xbm"/> + <datatype extension="xpm" type="galaxy.datatypes.images:Xpm" mimetype="image/xpm"/> + <datatype extension="rgb" type="galaxy.datatypes.images:Rgb" mimetype="image/rgb"/> + <datatype extension="pbm" type="galaxy.datatypes.images:Pbm" mimetype="image/pbm"/> + <datatype extension="pgm" type="galaxy.datatypes.images:Pgm" mimetype="image/pgm"/> + <datatype extension="eps" type="galaxy.datatypes.images:Eps" mimetype="image/eps"/> + <datatype extension="rast" type="galaxy.datatypes.images:Rast" mimetype="image/rast"/><datatype extension="laj" type="galaxy.datatypes.images:Laj"/><datatype extension="lav" type="galaxy.datatypes.sequence:Lav" display_in_upload="true"/><datatype extension="maf" type="galaxy.datatypes.sequence:Maf" display_in_upload="true"> @@ -102,7 +117,7 @@ </datatype><datatype extension="pdf" type="galaxy.datatypes.images:Pdf" mimetype="application/pdf"/><datatype extension="pileup" type="galaxy.datatypes.tabular:Pileup" display_in_upload="true" /> - <datatype extension="png" type="galaxy.datatypes.images:Image" mimetype="image/png"/> + <datatype extension="png" type="galaxy.datatypes.images:Png" mimetype="image/png"/><datatype extension="qual" type="galaxy.datatypes.qualityscore:QualityScore" /><datatype extension="qualsolexa" type="galaxy.datatypes.qualityscore:QualityScoreSolexa" display_in_upload="true"/><datatype extension="qualillumina" type="galaxy.datatypes.qualityscore:QualityScoreIllumina" display_in_upload="true"/> @@ -116,7 +131,7 @@ <datatype extension="svg" type="galaxy.datatypes.images:Image" mimetype="image/svg+xml"/><datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/><datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true"/> - <datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/> + <datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/><datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true"/><datatype extension="memexml" type="galaxy.datatypes.xml:MEMEXml" mimetype="application/xml" display_in_upload="true"/><datatype extension="blastxml" type="galaxy.datatypes.xml:BlastXml" mimetype="application/xml" display_in_upload="true"/> @@ -304,6 +319,24 @@ <sniffer type="galaxy.datatypes.tabular:Pileup"/><sniffer type="galaxy.datatypes.interval:Interval"/><sniffer type="galaxy.datatypes.tabular:Sam"/> + <sniffer type="galaxy.datatypes.images:Jpg"/> + <sniffer type="galaxy.datatypes.images:Png"/> + <sniffer type="galaxy.datatypes.images:Tiff"/> + <sniffer type="galaxy.datatypes.images:Bmp"/> + <sniffer type="galaxy.datatypes.images:Gif"/> + <sniffer type="galaxy.datatypes.images:Im"/> + <sniffer type="galaxy.datatypes.images:Pcd"/> + <sniffer type="galaxy.datatypes.images:Pcx"/> + <sniffer type="galaxy.datatypes.images:Ppm"/> + <sniffer type="galaxy.datatypes.images:Psd"/> + <sniffer type="galaxy.datatypes.images:Xbm"/> + <sniffer type="galaxy.datatypes.images:Xpm"/> + <sniffer type="galaxy.datatypes.images:Rgb"/> + <sniffer type="galaxy.datatypes.images:Pbm"/> + <sniffer type="galaxy.datatypes.images:Pgm"/> + <sniffer type="galaxy.datatypes.images:Xpm"/> + <sniffer type="galaxy.datatypes.images:Eps"/> + <sniffer type="galaxy.datatypes.images:Rast"/><!-- Keep this commented until the sniff method in the assembly.py module is fixed to not read the entire file.
--- a/lib/galaxy/datatypes/binary.py Tue Jul 26 16:14:31 2011 -0400 +++ b/lib/galaxy/datatypes/binary.py Wed Jul 27 16:14:15 2011 -0400 @@ -18,7 +18,7 @@ log = logging.getLogger(__name__)
# Currently these supported binary data types must be manually set on upload -unsniffable_binary_formats = [ 'ab1', 'scf' ] +unsniffable_binary_formats = [ 'ab1', 'scf', 'h5' ]
class Binary( data.Data ): """Binary data""" @@ -206,7 +206,24 @@ return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) ) def get_track_type( self ): return "ReadTrack", {"data": "bai", "index": "summary_tree"} - + +class H5( Binary ): + """Class describing an HDF5 file""" + file_ext = "h5" + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "Binary h5 file" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "Binary h5 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) ) + class Scf( Binary ): """Class describing an scf binary sequence file""" file_ext = "scf" @@ -292,7 +309,6 @@ Binary.__init__( self, **kwd ) self._magic = 0x8789F2EB self._name = "BigBed" - def get_track_type( self ): return "LineTrack", {"data_standalone": "bigbed"}
@@ -309,14 +325,12 @@ return True except IOError: return False - def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = "Binary TwoBit format nucleotide file" dataset.blurb = data.nice_size(dataset.get_size()) else: return super(TwoBit, self).set_peek(dataset, is_multi_byte) - def display_peek(self, dataset): try: return dataset.peek
--- a/lib/galaxy/datatypes/checkers.py Tue Jul 26 16:14:31 2011 -0400 +++ b/lib/galaxy/datatypes/checkers.py Wed Jul 27 16:14:15 2011 -0400 @@ -1,6 +1,28 @@ import os, gzip, re, gzip, zipfile, binascii, bz2 from galaxy import util
+try: + import Image as PIL +except ImportError: + try: + from PIL import Image as PIL + except: + PIL = None + +def check_image( file_path ): + if PIL != None: + try: + im = PIL.open( file_path ) + except: + return False + if im: + return im + return False + else: + if imghdr.what( file_path ) != None: + return True + return False + def check_html( file_path, chunk=None ): if chunk is None: temp = open( file_path, "U" )
--- a/lib/galaxy/datatypes/images.py Tue Jul 26 16:14:31 2011 -0400 +++ b/lib/galaxy/datatypes/images.py Wed Jul 27 16:14:15 2011 -0400 @@ -7,12 +7,31 @@ from galaxy.datatypes.metadata import MetadataElement from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * +from galaxy.datatypes.util.image_util import * from urllib import urlencode, quote_plus import zipfile -import os, subprocess, tempfile +import os, subprocess, tempfile, imghdr + +try: + import Image as PIL +except ImportError: + try: + from PIL import Image as PIL + except: + PIL = None
log = logging.getLogger(__name__)
+# TODO: Uploading image files of various types is supported in Galaxy, but on +# the main public instance, the display_in_upload is not set for these data +# types in datatypes_conf.xml because we do not allow image files to be uploaded +# there. There is currently no API feature that allows uploading files outside +# of a data library ( where it requires either the upload_paths or upload_directory +# option to be enabled, which is not the case on the main public instance ). Because +# of this, we're currently safe, but when the api is enhanced to allow other uploads, +# we need to ensure that the implementation is such that image files cannot be uploaded +# to our main public instance. + class Image( data.Data ): """Class describing an image""" def set_peek( self, dataset, is_multi_byte=False ): @@ -22,11 +41,110 @@ else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' + def sniff( self, filename ): + # First check if we can use PIL + if PIL is not None: + try: + im = PIL.open( filename ) + im.close() + return True + except: + return False + else: + if imghdr.what( filename ) is not None: + return True + else: + return False + +class Jpg( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in jpg format.""" + return check_image_type( filename, ['JPEG'], image ) + +class Png( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in png format.""" + return check_image_type( filename, ['PNG'], image ) + +class Tiff( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in tiff format.""" + return check_image_type( filename, ['TIFF'], image ) + +class Bmp( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in bmp format.""" + return check_image_type( filename, ['BMP'], image ) + +class Gif( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in gif format.""" + return check_image_type( filename, ['GIF'], image ) + +class Im( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in im format.""" + return check_image_type( filename, ['IM'], image ) + +class Pcd( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in pcd format.""" + return check_image_type( filename, ['PCD'], image ) + +class Pcx( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in pcx format.""" + return check_image_type( filename, ['PCX'], image ) + +class Ppm( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in ppm format.""" + return check_image_type( filename, ['PPM'], image ) + +class Psd( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in psd format.""" + return check_image_type( filename, ['PSD'], image ) + +class Xbm( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in XBM format.""" + return check_image_type( filename, ['XBM'], image ) + +class Xpm( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in XPM format.""" + return check_image_type( filename, ['XPM'], image ) + +class Rgb( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in RGB format.""" + return check_image_type( filename, ['RGB'], image ) + +class Pbm( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in PBM format""" + return check_image_type( filename, ['PBM'], image ) + +class Pgm( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in PGM format""" + return check_image_type( filename, ['PGM'], image ) + +class Eps( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in eps format.""" + return check_image_type( filename, ['EPS'], image ) + + +class Rast( Image ): + def sniff(self, filename, image=None): + """Determine if the file is in rast format""" + return check_image_type( filename, ['RAST'], image )
class Pdf( Image ): def sniff(self, filename): - """Determine if the file is in pdf format. - """ + """Determine if the file is in pdf format.""" headers = get_headers(filename, None, 1) try: if headers[0][0].startswith("%PDF"):
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/util/image_util.py Wed Jul 27 16:14:15 2011 -0400 @@ -0,0 +1,76 @@ +""" +Provides utilities for working with image files. +""" +import logging, imghdr + +try: + import Image as PIL +except ImportError: + try: + from PIL import Image as PIL + except: + PIL = None + +log = logging.getLogger(__name__) + +def image_type( filename, image=None ): + format = '' + if PIL is not None: + if image is not None: + format = image.format + else: + try: + im = PIL.open( filename ) + format = im.format + im.close() + except: + return False + else: + format = imghdr.what( filename ) + if format is not None: + format = format.upper() + else: + return False + return format +def check_image_type( filename, types, image=None ): + format = image_type( filename, image ) + # First check if we can use PIL + if format in types: + return True + return False +def get_image_ext ( file_path, image ): + #determine ext + format = image_type( file_path, image ) + if format in [ 'JPG','JPEG' ]: + return 'jpg' + if format == 'PNG': + return 'png' + if format == 'TIFF': + return 'tiff' + if format == 'BMP': + return 'bmp' + if format == 'GIF': + return 'gif' + if format == 'IM': + return 'im' + if format == 'PCD': + return 'pcd' + if format == 'PCX': + return 'pcx' + if format == 'PPM': + return 'ppm' + if format == 'PSD': + return 'psd' + if format == 'XBM': + return 'xbm' + if format == 'XPM': + return 'xpm' + if format == 'RGB': + return 'rgb' + if format == 'PBM': + return 'pbm' + if format == 'PGM': + return 'pgm' + if format == 'EPS': + return 'eps' + return None
--- a/test/functional/test_get_data.py Tue Jul 26 16:14:31 2011 -0400 +++ b/test/functional/test_get_data.py Wed Jul 27 16:14:15 2011 -0400 @@ -124,7 +124,7 @@ self.upload_file( '454Score.png' ) hda = get_latest_hda() assert hda is not None, "Problem retrieving hda from database" - self.check_history_for_string( "The uploaded file contains inappropriate content" ) + self.check_history_for_string( "454Score.png" ) def test_0055_upload_file( self ): """Test uploading lped composite datatype file, manually setting the file format""" # Logged in as admin_user
--- a/tools/data_source/upload.py Tue Jul 26 16:14:31 2011 -0400 +++ b/tools/data_source/upload.py Wed Jul 27 16:14:15 2011 -0400 @@ -14,9 +14,18 @@ from galaxy.datatypes.images import Pdf from galaxy.datatypes.registry import Registry from galaxy import util +from galaxy.datatypes.util.image_util import * from galaxy.util.json import *
try: + import Image as PIL +except ImportError: + try: + from PIL import Image as PIL + except: + PIL = None + +try: import bz2 except: bz2 = None @@ -51,16 +60,12 @@ return d def check_bam( file_path ): return Bam().sniff( file_path ) - def check_sff( file_path ): return Sff().sniff( file_path ) - def check_pdf( file_path ): return Pdf().sniff( file_path ) - def check_bigwig( file_path ): return BigWig().sniff( file_path ) - def check_bigbed( file_path ): return BigBed().sniff( file_path ) def parse_outputs( args ): @@ -102,8 +107,16 @@ dataset.is_multi_byte = util.is_multi_byte( codecs.open( dataset.path, 'r', 'utf-8' ).read( 100 ) ) except UnicodeDecodeError, e: dataset.is_multi_byte = False + # Is dataset an image? + image = check_image( dataset.path ) + if image: + if not PIL: + image = None + # get_image_ext() returns None if nor a supported Image type + ext = get_image_ext( dataset.path, image ) + data_type = ext # Is dataset content multi-byte? - if dataset.is_multi_byte: + elif dataset.is_multi_byte: data_type = 'multi-byte char' ext = sniff.guess_ext( dataset.path, is_multi_byte=True ) # Is dataset content supported sniffable binary? @@ -122,7 +135,7 @@ elif check_bigbed( dataset.path ): ext = 'bigbed' data_type = 'bigbed' - else: + if not data_type: # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress is_gzipped, is_valid = check_gzip( dataset.path ) if is_gzipped and not is_valid: @@ -314,7 +327,6 @@ if datatype.dataset_content_needs_grooming( output_path ): # Groom the dataset content if necessary datatype.groom_dataset_content( output_path ) - def add_composite_file( dataset, registry, json_file, output_path, files_path ): if dataset.composite_files: os.mkdir( files_path )
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
galaxy-commits@lists.galaxyproject.org