galaxy-dist commit 5f9d6d9582e5: Read zipfiles in chunks when uncompressing in the upload tool. And I continue to wish zipfile was a lot more like tarfile...
# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Nate Coraor <nate@bx.psu.edu> # Date 1287608406 14400 # Node ID 5f9d6d9582e50f134c97e8523a015f210dd3af70 # Parent 7afd79d131e40f5f9a61125ec159552dc7c3fabd Read zipfiles in chunks when uncompressing in the upload tool. And I continue to wish zipfile was a lot more like tarfile... --- a/tools/data_source/upload.py +++ b/tools/data_source/upload.py @@ -258,6 +258,9 @@ def add_file( dataset, registry, json_fi # See if we have a zip archive is_zipped = check_zip( dataset.path ) if is_zipped: + CHUNK_SIZE = 2**20 # 1Mb + uncompressed = None + uncompressed_name = None unzipped = False z = zipfile.ZipFile( dataset.path ) for name in z.namelist(): @@ -267,18 +270,28 @@ def add_file( dataset, registry, json_fi stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.' break fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname( dataset.path ), text=False ) - try: - outfile = open( uncompressed, 'wb' ) - outfile.write( z.read( name ) ) - outfile.close() - shutil.move( uncompressed, dataset.path ) - dataset.name = name - unzipped = True - except IOError: - os.close( fd ) - os.remove( uncompressed ) - file_err( 'Problem decompressing zipped data', dataset, json_file ) - return + zipped_file = z.open( name ) + while 1: + try: + chunk = zipped_file.read( CHUNK_SIZE ) + except IOError: + os.close( fd ) + os.remove( uncompressed ) + file_err( 'Problem decompressing zipped data', dataset, json_file ) + return + if not chunk: + break + os.write( fd, chunk ) + os.close( fd ) + zipped_file.close() + uncompressed_name = name + unzipped = True + z.close() + # Replace the zipped file with the decompressed file + if uncompressed is not None: + shutil.move( uncompressed, dataset.path ) + dataset.name = uncompressed_name + data_type = 'zip' if not data_type: if check_binary( dataset.path ): # We have a binary dataset, but it is not Bam, Sff or Pdf
participants (1)
-
commits-noreply@bitbucket.org