details: http://www.bx.psu.edu/hg/galaxy/rev/02843f56b812 changeset: 1742:02843f56b812 user: Greg Von Kuster <greg@bx.psu.edu> date: Wed Mar 25 11:51:30 2009 -0400 description: i18n fixes: uploading binary files now works again, multi-byte character files are no longer set as binary, file peek now supports multi-byte characters. 7 file(s) affected in this change: eggs.ini lib/galaxy/datatypes/data.py lib/galaxy/datatypes/sniff.py lib/galaxy/model/__init__.py lib/galaxy/tools/actions/upload.py lib/galaxy/util/__init__.py lib/galaxy/web/framework/__init__.py diffs (686 lines): diff -r affd3085eee3 -r 02843f56b812 eggs.ini --- a/eggs.ini Fri Mar 20 02:49:30 2009 -0400 +++ b/eggs.ini Wed Mar 25 11:51:30 2009 -0400 @@ -48,6 +48,7 @@ WebOb = 0.8.5 wsgiref = 0.1.2 Babel = 0.9.4 +wchartype = 0.1 ; extra version information [tags] @@ -94,3 +95,4 @@ WebOb = http://pypi.python.org/packages/source/W/WebOb/WebOb-0.8.5.tar.gz wsgiref = http://pypi.python.org/packages/source/w/wsgiref/wsgiref-0.1.2.zip Babel = http://ftp.edgewall.com/pub/babel/Babel-0.9.4.zip +wchartype = http://ginstrom.com/code/wchartype-0.1.zip diff -r affd3085eee3 -r 02843f56b812 lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py Fri Mar 20 02:49:30 2009 -0400 +++ b/lib/galaxy/datatypes/data.py Wed Mar 25 11:51:30 2009 -0400 @@ -109,7 +109,7 @@ else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' - def display_peek(self, dataset): + def display_peek(self, dataset ): """Create HTML table, used for displaying peek""" out = ['<table cellspacing="0" cellpadding="3">'] try: @@ -121,7 +121,7 @@ line = line.strip() if not line: continue - out.append( '<tr><td>%s</td></tr>' % escape( line ) ) + out.append( '<tr><td>%s</td></tr>' % escape( unicode( line, 'utf-8' ) ) ) out.append( '</table>' ) out = "".join( out ) except Exception, exc: @@ -190,7 +190,6 @@ except: log.exception('Function %s is referred to in datatype %s for displaying as type %s, but is not accessible' % (self.supported_display_apps[type]['file_function'], self.__class__.__name__, type) ) return "This display type (%s) is not implemented for this datatype (%s)." % ( type, dataset.ext) - def get_display_links(self, dataset, type, app, base_url, **kwd): """Returns a list of tuples of (name, link) for a particular display type """ try: @@ -199,21 +198,17 @@ except: log.exception('Function %s is referred to in datatype %s for generating links for type %s, but is not accessible' % (self.supported_display_apps[type]['links_function'], self.__class__.__name__, type) ) return [] - def get_converter_types(self, original_dataset, datatypes_registry): """Returns available converters by type for this dataset""" return datatypes_registry.get_converters_by_datatype(original_dataset.ext) - def find_conversion_destination( self, dataset, accepted_formats, datatypes_registry, **kwd ): """Returns ( target_ext, exisiting converted dataset )""" return datatypes_registry.find_conversion_destination_for_dataset_by_extensions( dataset, accepted_formats, **kwd ) - def convert_dataset(self, trans, original_dataset, target_type, return_output = False, visible = True ): """This function adds a job to the queue to convert a dataset to another type. Returns a message about success/failure.""" converter = trans.app.datatypes_registry.get_converter_by_target_type( original_dataset.ext, target_type ) if converter is None: raise "A converter does not exist for %s to %s." % ( original_dataset.ext, target_type ) - #Generate parameter dictionary params = {} #determine input parameter name and add to params @@ -223,31 +218,24 @@ input_name = key break params[input_name] = original_dataset - #Run converter, job is dispatched through Queue converted_dataset = converter.execute( trans, incoming = params, set_output_hid = visible ) - if len(params) > 0: trans.log_event( "Converter params: %s" % (str(params)), tool_id=converter.id ) - if not visible: for name, value in converted_dataset.iteritems(): value.visible = False - if return_output: return converted_dataset return "The file conversion of %s on data %s has been added to the Queue." % (converter.name, original_dataset.hid) - def before_edit( self, dataset ): """This function is called on the dataset before metadata is edited.""" pass - def after_edit( self, dataset ): """This function is called on the dataset after metadata is edited.""" dataset.clear_associated_files( metadata_safe = True ) class Text( Data ): - def write_from_stream(self, dataset, stream): """Writes data from a stream""" # write it twice for now @@ -265,30 +253,36 @@ line = line.strip() + '\n' fp.write(line) fp.close() - def set_raw_data(self, dataset, data): """Saves the data on the disc""" fd, temp_name = tempfile.mkstemp() os.write(fd, data) os.close(fd) - # rewrite the file with unix newlines fp = open(dataset.file_name, 'wt') for line in file(temp_name, "U"): line = line.strip() + '\n' fp.write(line) fp.close() - os.remove( temp_name ) - def get_mime(self): """Returns the mime type of the datatype""" return 'text/plain' - def set_peek( self, dataset, line_count=None ): if not dataset.dataset.purged: # The file must exist on disk for the get_file_peek() method dataset.peek = get_file_peek( dataset.file_name ) + if line_count is None: + dataset.blurb = "%s lines" % util.commaify( str( get_line_count( dataset.file_name ) ) ) + else: + dataset.blurb = "%s lines" % util.commaify( str( line_count ) ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def set_multi_byte_peek( self, dataset, line_count=None ): + if not dataset.dataset.purged: + # The file must exist on disk for the get_file_peek() method + dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=True ) if line_count is None: dataset.blurb = "%s lines" % util.commaify( str( get_line_count( dataset.file_name ) ) ) else: @@ -340,7 +334,7 @@ return out return '??? bytes' -def get_file_peek( file_name, WIDTH=256, LINE_COUNT=5 ): +def get_file_peek( file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5 ): """ Returns the first LINE_COUNT lines wrapped to WIDTH @@ -350,12 +344,12 @@ """ lines = [] count = 0 - file_type = '' + file_type = None data_checked = False for line in file( file_name ): - line = line[ :WIDTH ] - if not data_checked and line: - data_checked = True + line = line[:WIDTH] + if line and not is_multi_byte and not data_checked: + # See if we have a compressed or binary file if line[0:2] == util.gzip_magic: file_type = 'gzipped' break @@ -364,14 +358,17 @@ if ord( char ) > 128: file_type = 'binary' break + data_checked = True + if file_type in [ 'gzipped', 'binary' ]: + break lines.append( line ) if count == LINE_COUNT: break count += 1 - if file_type: - text = "%s file" %file_type + if file_type in [ 'gzipped', 'binary' ]: + text = "%s file" % file_type else: - text = '\n'.join( lines ) + text = unicode( '\n'.join( lines ), 'utf-8' ) return text def get_line_count(file_name): diff -r affd3085eee3 -r 02843f56b812 lib/galaxy/datatypes/sniff.py --- a/lib/galaxy/datatypes/sniff.py Fri Mar 20 02:49:30 2009 -0400 +++ b/lib/galaxy/datatypes/sniff.py Wed Mar 25 11:51:30 2009 -0400 @@ -1,8 +1,9 @@ """ File format detector """ -import logging, sys, os, csv, tempfile, shutil, re +import logging, sys, os, csv, tempfile, shutil, re, zipfile import registry +from galaxy import util log = logging.getLogger(__name__) @@ -13,18 +14,43 @@ return full_path def stream_to_file( stream, suffix='', prefix='', dir=None, text=False ): - """ - Writes a stream to a temporary file, returns the temporary file's name - """ + """Writes a stream to a temporary file, returns the temporary file's name""" fd, temp_name = tempfile.mkstemp( suffix=suffix, prefix=prefix, dir=dir, text=text ) + CHUNK_SIZE = 1048576 + data_checked = False + is_compressed = False + is_binary = False + is_multi_byte = False while 1: - chunk = stream.read(1048576) + chunk = stream.read( CHUNK_SIZE ) if not chunk: break - # TODO: does this work on binary files? - os.write( fd, chunk.encode( "utf-8" ) ) - os.close(fd) - return temp_name + if not data_checked: + # See if we're uploading a compressed file + if zipfile.is_zipfile( temp_name ): + is_compressed = True + else: + magic_check = chunk[:2] + if magic_check == util.gzip_magic: + is_compressed = True + if not is_compressed: + # See if we have a multi-byte character file + chars = chunk[:100] + is_multi_byte = util.is_multi_byte( chars ) + if not is_multi_byte: + for char in chars: + if ord( char ) > 128: + is_binary = True + break + data_checked = True + if not is_compressed and not is_binary: + os.write( fd, chunk.encode( "utf-8" ) ) + else: + # Compressed files must be encoded after they are uncompressed in the upload utility, + # while binary files should not be encoded at all. + os.write( fd, chunk ) + os.close( fd ) + return temp_name, is_multi_byte def convert_newlines( fname ): """ @@ -94,7 +120,7 @@ # Return number of lines in file. return i + 1 -def get_headers(fname, sep, count=60): +def get_headers( fname, sep, count=60, is_multi_byte=False ): """ Returns a list with the first 'count' lines split by 'sep' @@ -105,12 +131,16 @@ headers = [] for idx, line in enumerate(file(fname)): line = line.rstrip('\n\r') + if is_multi_byte: + # TODO: fix this - sep is never found in line + line = unicode( line, 'utf-8' ) + sep = sep.encode( 'utf-8' ) headers.append( line.split(sep) ) if idx == count: break return headers -def is_column_based(fname, sep='\t', skip=0): +def is_column_based( fname, sep='\t', skip=0, is_multi_byte=False ): """ Checks whether the file is column based with respect to a separator (defaults to tab separator). @@ -138,9 +168,8 @@ >>> is_column_based(fname) True """ - headers = get_headers(fname, sep) + headers = get_headers( fname, sep, is_multi_byte=is_multi_byte ) count = 0 - if not headers: return False for hdr in headers[skip:]: @@ -156,7 +185,7 @@ return False return True -def guess_ext( fname, sniff_order=None ): +def guess_ext( fname, sniff_order=None, is_multi_byte=False ): """ Returns an extension that can be used in the datatype factory to generate a data for the 'fname' file @@ -220,20 +249,28 @@ return datatype.file_ext except: pass - headers = get_headers( fname, None ) - is_binary = True - for hdr in headers: - for char in hdr: - try: - if not ord(char) > 128: - is_binary = False - except: - is_binary = False + is_binary = False + if is_multi_byte: + is_binary = False + else: + for hdr in headers: + for char in hdr: + if len( char ) > 1: + for c in char: + if ord( c ) > 128: + is_binary = True + break + elif ord( char ) > 128: + is_binary = True + break + if is_binary: + break + if is_binary: break if is_binary: return 'data' #default binary data type file extension - if is_column_based( fname, '\t', 1): + if is_column_based( fname, '\t', 1, is_multi_byte=is_multi_byte ): return 'tabular' #default tabular data type file extension return 'txt' #default text data type file extension diff -r affd3085eee3 -r 02843f56b812 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py Fri Mar 20 02:49:30 2009 -0400 +++ b/lib/galaxy/model/__init__.py Wed Mar 25 11:51:30 2009 -0400 @@ -245,6 +245,8 @@ return datatypes_registry.get_mimetype_by_extension( self.extension.lower() ) def set_peek( self ): return self.datatype.set_peek( self ) + def set_multi_byte_peek( self ): + return self.datatype.set_multi_byte_peek( self ) def init_meta( self, copy_from=None ): return self.datatype.init_meta( self, copy_from=copy_from ) def set_meta( self, **kwd ): diff -r affd3085eee3 -r 02843f56b812 lib/galaxy/tools/actions/upload.py --- a/lib/galaxy/tools/actions/upload.py Fri Mar 20 02:49:30 2009 -0400 +++ b/lib/galaxy/tools/actions/upload.py Wed Mar 25 11:51:30 2009 -0400 @@ -23,6 +23,7 @@ file_type = incoming['file_type'] dbkey = incoming['dbkey'] url_paste = incoming['url_paste'] + is_multi_byte = False space_to_tab = False if 'space_to_tab' in incoming: if incoming['space_to_tab'] not in ["None", None]: @@ -49,7 +50,7 @@ file_name = file_name.split( '\\' )[-1] file_name = file_name.split( '/' )[-1] try: - data_list.append( self.add_file( trans, data_file.local_filename, file_name, file_type, dbkey, space_to_tab=space_to_tab ) ) + data_list.append( self.add_file( trans, data_file.local_filename, file_name, file_type, is_multi_byte, dbkey, space_to_tab=space_to_tab ) ) except Exception, e: log.exception( 'exception in add_file using datafile.local_filename %s: %s' % ( data_file.local_filename, str( e ) ) ) self.remove_tempfile( data_file.local_filename ) @@ -59,13 +60,13 @@ file_name = file_name.split( '\\' )[-1] file_name = file_name.split( '/' )[-1] try: - temp_name = sniff.stream_to_file( data_file.file, prefix='upload' ) + temp_name, is_multi_byte = sniff.stream_to_file( data_file.file, prefix='upload' ) except Exception, e: log.exception( 'exception in sniff.stream_to_file using file %s: %s' % ( data_file.filename, str( e ) ) ) self.remove_tempfile( temp_name ) return self.upload_empty( trans, job, "Error:", str( e ) ) try: - data_list.append( self.add_file( trans, temp_name, file_name, file_type, dbkey, space_to_tab=space_to_tab ) ) + data_list.append( self.add_file( trans, temp_name, file_name, file_type, is_multi_byte, dbkey, space_to_tab=space_to_tab ) ) except Exception, e: log.exception( 'exception in add_file using file temp_name %s: %s' % ( str( temp_name ), str( e ) ) ) self.remove_tempfile( temp_name ) @@ -77,13 +78,13 @@ line = line.rstrip( '\r\n' ) if line: try: - temp_name = sniff.stream_to_file( urllib.urlopen( line ), prefix='url_paste' ) + temp_name, is_multi_byte = sniff.stream_to_file( urllib.urlopen( line ), prefix='url_paste' ) except Exception, e: log.exception( 'exception in sniff.stream_to_file using url_paste %s: %s' % ( url_paste, str( e ) ) ) self.remove_tempfile( temp_name ) return self.upload_empty( trans, job, "Error:", str( e ) ) try: - data_list.append( self.add_file( trans, temp_name, line, file_type, dbkey, info="uploaded url", space_to_tab=space_to_tab ) ) + data_list.append( self.add_file( trans, temp_name, line, file_type, is_multi_byte, dbkey, info="uploaded url", space_to_tab=space_to_tab ) ) except Exception, e: log.exception( 'exception in add_file using url_paste temp_name %s: %s' % ( str( temp_name ), str( e ) ) ) self.remove_tempfile( temp_name ) @@ -97,13 +98,13 @@ break if is_valid: try: - temp_name = sniff.stream_to_file( StringIO.StringIO( url_paste ), prefix='strio_url_paste' ) + temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( url_paste ), prefix='strio_url_paste' ) except Exception, e: log.exception( 'exception in sniff.stream_to_file using StringIO.StringIO( url_paste ) %s: %s' % ( url_paste, str( e ) ) ) self.remove_tempfile( temp_name ) return self.upload_empty( trans, job, "Error:", str( e ) ) try: - data_list.append( self.add_file( trans, temp_name, 'Pasted Entry', file_type, dbkey, info="pasted entry", space_to_tab=space_to_tab ) ) + data_list.append( self.add_file( trans, temp_name, 'Pasted Entry', file_type, is_multi_byte, dbkey, info="pasted entry", space_to_tab=space_to_tab ) ) except Exception, e: log.exception( 'exception in add_file using StringIO.StringIO( url_paste ) temp_name %s: %s' % ( str( temp_name ), str( e ) ) ) self.remove_tempfile( temp_name ) @@ -144,85 +145,87 @@ trans.log_event( 'job id %d ended with errors, err_msg: %s' % ( job.id, err_msg ), tool_id=job.tool_id ) return dict( output=data ) - def add_file( self, trans, temp_name, file_name, file_type, dbkey, info=None, space_to_tab=False ): + def add_file( self, trans, temp_name, file_name, file_type, is_multi_byte, dbkey, info=None, space_to_tab=False ): data_type = None + ext = '' # See if we have an empty file if not os.path.getsize( temp_name ) > 0: raise BadFileException( "you attempted to upload an empty file." ) - # See if we have a gzipped file, which, if it passes our restrictions, - # we'll decompress on the fly. - is_gzipped, is_valid = self.check_gzip( temp_name ) - if is_gzipped and not is_valid: - raise BadFileException( "you attempted to upload an inappropriate file." ) - elif is_gzipped and is_valid: - #We need to decompress the temp_name file - CHUNK_SIZE = 2**20 # 1Mb - fd, uncompressed = tempfile.mkstemp() - gzipped_file = gzip.GzipFile( temp_name ) - while 1: - try: - chunk = gzipped_file.read( CHUNK_SIZE ) - except IOError: + if is_multi_byte: + ext = sniff.guess_ext( temp_name, is_multi_byte=True ) + else: + if not data_type: + # See if we have a gzipped file, which, if it passes our restrictions, + # we'll decompress on the fly. + is_gzipped, is_valid = self.check_gzip( temp_name ) + if is_gzipped and not is_valid: + raise BadFileException( "you attempted to upload an inappropriate file." ) + elif is_gzipped and is_valid: + #We need to decompress the temp_name file + CHUNK_SIZE = 2**20 # 1Mb + fd, uncompressed = tempfile.mkstemp() + gzipped_file = gzip.GzipFile( temp_name ) + while 1: + try: + chunk = gzipped_file.read( CHUNK_SIZE ) + except IOError: + os.close( fd ) + os.remove( uncompressed ) + raise BadFileException( 'problem decompressing gzipped data.' ) + if not chunk: + break + os.write( fd, chunk.encode( "utf-8" ) ) os.close( fd ) - os.remove( uncompressed ) - raise BadFileException( 'problem decompressing gzipped data.' ) - if not chunk: - break - os.write( fd, chunk ) - os.close( fd ) - gzipped_file.close() - # Replace the gzipped file with the decompressed file - shutil.move( uncompressed, temp_name ) - file_name = file_name.rstrip( '.gz' ) - data_type = 'gzip' - ext = '' - if not data_type: - # See if we have a zip archive - is_zipped, is_valid, test_ext = self.check_zip( temp_name ) - if is_zipped and not is_valid: - raise BadFileException( "you attempted to upload an inappropriate file." ) - elif is_zipped and is_valid: - # Currently, we force specific tools to handle this case. We also require the user - # to manually set the incoming file_type - if ( test_ext == 'ab1' or test_ext == 'scf' ) and file_type != 'binseq.zip': - raise BadFileException( "Invalid 'File Format' for archive consisting of binary files - use 'Binseq.zip'." ) - elif test_ext == 'txt' and file_type != 'txtseq.zip': - raise BadFileException( "Invalid 'File Format' for archive consisting of text files - use 'Txtseq.zip'." ) - if not ( file_type == 'binseq.zip' or file_type == 'txtseq.zip' ): - raise BadFileException( "you must manually set the 'File Format' to either 'Binseq.zip' or 'Txtseq.zip' when uploading zip files." ) - data_type = 'zip' - ext = file_type - if not data_type: - if self.check_binary( temp_name ): - parts = file_name.split( "." ) - if len( parts ) > 1: - ext = parts[1].strip().lower() - if not( ext == 'ab1' or ext == 'scf' ): - raise BadFileException( "you attempted to upload an inappropriate file." ) - if ext == 'ab1' and file_type != 'ab1': - raise BadFileException( "you must manually set the 'File Format' to 'Ab1' when uploading ab1 files." ) - elif ext == 'scf' and file_type != 'scf': - raise BadFileException( "you must manually set the 'File Format' to 'Scf' when uploading scf files." ) - data_type = 'binary' - if not data_type: - # We must have a text file - if self.check_html( temp_name ): - raise BadFileException( "you attempted to upload an inappropriate file." ) - if data_type != 'binary' and data_type != 'zip': - if space_to_tab: - self.line_count = sniff.convert_newlines_sep2tabs( temp_name ) - else: - self.line_count = sniff.convert_newlines( temp_name ) - if file_type == 'auto': - ext = sniff.guess_ext( temp_name, sniff_order=trans.app.datatypes_registry.sniff_order ) - else: - ext = file_type - data_type = ext - + gzipped_file.close() + # Replace the gzipped file with the decompressed file + shutil.move( uncompressed, temp_name ) + file_name = file_name.rstrip( '.gz' ) + data_type = 'gzip' + if not data_type: + # See if we have a zip archive + is_zipped, is_valid, test_ext = self.check_zip( temp_name ) + if is_zipped and not is_valid: + raise BadFileException( "you attempted to upload an inappropriate file." ) + elif is_zipped and is_valid: + # Currently, we force specific tools to handle this case. We also require the user + # to manually set the incoming file_type + if ( test_ext == 'ab1' or test_ext == 'scf' ) and file_type != 'binseq.zip': + raise BadFileException( "Invalid 'File Format' for archive consisting of binary files - use 'Binseq.zip'." ) + elif test_ext == 'txt' and file_type != 'txtseq.zip': + raise BadFileException( "Invalid 'File Format' for archive consisting of text files - use 'Txtseq.zip'." ) + if not ( file_type == 'binseq.zip' or file_type == 'txtseq.zip' ): + raise BadFileException( "you must manually set the 'File Format' to either 'Binseq.zip' or 'Txtseq.zip' when uploading zip files." ) + data_type = 'zip' + ext = file_type + if not data_type: + if self.check_binary( temp_name ): + parts = file_name.split( "." ) + if len( parts ) > 1: + ext = parts[1].strip().lower() + if not( ext == 'ab1' or ext == 'scf' ): + raise BadFileException( "you attempted to upload an inappropriate file." ) + if ext == 'ab1' and file_type != 'ab1': + raise BadFileException( "you must manually set the 'File Format' to 'Ab1' when uploading ab1 files." ) + elif ext == 'scf' and file_type != 'scf': + raise BadFileException( "you must manually set the 'File Format' to 'Scf' when uploading scf files." ) + data_type = 'binary' + if not data_type: + # We must have a text file + if self.check_html( temp_name ): + raise BadFileException( "you attempted to upload an inappropriate file." ) + if data_type != 'binary' and data_type != 'zip': + if space_to_tab: + self.line_count = sniff.convert_newlines_sep2tabs( temp_name ) + else: + self.line_count = sniff.convert_newlines( temp_name ) + if file_type == 'auto': + ext = sniff.guess_ext( temp_name, sniff_order=trans.app.datatypes_registry.sniff_order ) + else: + ext = file_type + data_type = ext if info is None: info = 'uploaded %s file' %data_type - - data = trans.app.model.HistoryDatasetAssociation( history = trans.history, extension = ext, create_dataset = True ) + data = trans.app.model.HistoryDatasetAssociation( history=trans.history, extension=ext, create_dataset=True ) data.name = file_name data.dbkey = dbkey data.info = info @@ -233,19 +236,25 @@ data.init_meta() if self.line_count is not None: try: - data.set_peek( line_count=self.line_count ) + if is_multi_byte: + data.set_multi_byte_peek( line_count=self.line_count ) + else: + data.set_peek( line_count=self.line_count ) except: + if is_multi_byte: + data.set_multi_byte_peek() + else: + data.set_peek() + else: + if is_multi_byte: + data.set_multi_byte_peek() + else: data.set_peek() - else: - data.set_peek() - - # validate incomming data - """ - Commented by greg on 3/14/07 - for error in data.datatype.validate( data ): - data.add_validation_error( - model.ValidationError( message=str( error ), err_type=error.__class__.__name__, attributes=util.object_to_string( error.__dict__ ) ) ) - """ + # validate incoming data + # Commented by greg on 3/14/07 + # for error in data.datatype.validate( data ): + # data.add_validation_error( + # model.ValidationError( message=str( error ), err_type=error.__class__.__name__, attributes=util.object_to_string( error.__dict__ ) ) ) if data.missing_meta(): data.datatype.set_meta( data ) dbkey_to_store = dbkey @@ -319,6 +328,8 @@ lineno += 1 line = line.strip() if line: + if util.is_multi_byte( line ): + return False for char in line: if ord( char ) > 128: if chunk is None: diff -r affd3085eee3 -r 02843f56b812 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py Fri Mar 20 02:49:30 2009 -0400 +++ b/lib/galaxy/util/__init__.py Wed Mar 25 11:51:30 2009 -0400 @@ -14,10 +14,33 @@ pkg_resources.require( 'elementtree' ) from elementtree import ElementTree +pkg_resources.require( "wchartype" ) +import wchartype + log = logging.getLogger(__name__) _lock = threading.RLock() gzip_magic = '\037\213' + +def is_multi_byte( chars ): + for char in chars: + try: + char = unicode( char ) + except Exception, e: + # Probably binary + log.exception( e ) + return False + if wchartype.is_asian( char ) or \ + wchartype.is_full_width( char ) or \ + wchartype.is_kanji( char ) or \ + wchartype.is_hiragana( char ) or \ + wchartype.is_katakana( char ) or \ + wchartype.is_half_katakana( char ) or \ + wchartype.is_hangul( char ) or \ + wchartype.is_full_digit( char ) or \ + wchartype.is_full_letter( char ): + return True + return False def synchronized(func): """This wrapper will serialize access to 'func' to a single thread. Use it as a decorator.""" diff -r affd3085eee3 -r 02843f56b812 lib/galaxy/web/framework/__init__.py --- a/lib/galaxy/web/framework/__init__.py Fri Mar 20 02:49:30 2009 -0400 +++ b/lib/galaxy/web/framework/__init__.py Wed Mar 25 11:51:30 2009 -0400 @@ -466,8 +466,7 @@ return self.fill_template_mako( filename, **kwargs ) else: template = Template( file=os.path.join(self.app.config.template_path, filename), - searchList=[kwargs, self.template_context, dict(caller=self, t=self, h=webhelpers, util=util, request=self.request, response=self.response, app=self.app)], - output_encoding='utf-8' ) + searchList=[kwargs, self.template_context, dict(caller=self, t=self, h=webhelpers, util=util, request=self.request, response=self.response, app=self.app)] ) return str( template ) def fill_template_mako( self, filename, **kwargs ): template = self.webapp.mako_template_lookup.get_template( filename ) @@ -481,8 +480,7 @@ Fill in a template, putting any keyword arguments on the context. """ template = Template( source=template_string, - searchList=[context or kwargs, dict(caller=self)], - output_encoding='utf-8' ) + searchList=[context or kwargs, dict(caller=self)] ) return str(template) class FormBuilder( object ):