commit/galaxy-central: greg: Add support for uncompressing a gz or bz2 compressed file upon upload to the galaxy tool shed. Fixes issue # 586.
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/a746bfc39b17/ changeset: a746bfc39b17 user: greg date: 2011-06-17 21:00:22 summary: Add support for uncompressing a gz or bz2 compressed file upon upload to the galaxy tool shed. Fixes issue # 586. affected #: 2 files (5.1 KB) --- a/lib/galaxy/webapps/community/controllers/upload.py Fri Jun 17 14:27:47 2011 -0400 +++ b/lib/galaxy/webapps/community/controllers/upload.py Fri Jun 17 15:00:22 2011 -0400 @@ -1,6 +1,7 @@ import sys, os, shutil, logging, tarfile, tempfile from galaxy.web.base.controller import * from galaxy.model.orm import * +from galaxy.datatypes.checkers import * from common import get_categories, get_repository from mercurial import hg, ui @@ -8,6 +9,7 @@ # States for passing messages SUCCESS, INFO, WARNING, ERROR = "done", "info", "warning", "error" +CHUNK_SIZE = 2**20 # 1Mb class UploadError( Exception ): pass @@ -26,6 +28,7 @@ repository = get_repository( trans, repository_id ) repo_dir = repository.repo_path repo = hg.repository( ui.ui(), repo_dir ) + uncompress_file = util.string_as_bool( params.get( 'uncompress_file', 'true' ) ) uploaded_file = None upload_point = params.get( 'upload_point', None ) if upload_point is not None: @@ -59,25 +62,37 @@ uploaded_file = None elif file_data not in ( '', None ): uploaded_file = file_data.file + uploaded_file_name = uploaded_file.name + uploaded_file_filename = file_data.filename if uploaded_file: # Our current support for browsing repo contents requires a copy of the - # repository files in the repo root directory. To produce these copies, - # we update without passing the "-r null" flag. + # repository files in the repo root directory. To eliminate these copies, + # we update the repo, passing the "-r null" flag. os.chdir( repo_dir ) os.system( 'hg update -r null > /dev/null 2>&1' ) os.chdir( current_working_dir ) + isgzip = False + isbz2 = False + if uncompress_file: + isgzip = is_gzip( uploaded_file_name ) + if not isgzip: + isbz2 = is_bz2( uploaded_file_name ) ok = True files_to_commit = [] # Determine what we have - a single file or an archive try: - tar = tarfile.open( uploaded_file.name ) + if uncompress_file: + # Open for reading with transparent compression. + tar = tarfile.open( uploaded_file_name, 'r:*' ) + else: + tar = tarfile.open( uploaded_file_name ) istar = True except tarfile.ReadError, e: tar = None istar = False if repository.is_new: if istar: - # We have an archive ( a tarball ) + # We have an archive ( a tarball ) in a new repository. ok, message = self.__check_archive( tar ) if ok: tar.extractall( path=repo_dir ) @@ -108,23 +123,28 @@ else: tar.close() else: - # We have a single file + # We have a single file in a new repository. + if uncompress_file and ( isgzip or isbz2 ): + uploaded_file_filename = self.uncompress( repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 ) if upload_point is not None: - full_path = os.path.abspath( os.path.join( upload_point, file_data.filename ) ) - file_path = os.path.join( upload_point, file_data.filename ) + full_path = os.path.abspath( os.path.join( upload_point, uploaded_file_filename ) ) + file_path = os.path.join( upload_point, uploaded_file_filename ) else: - full_path = os.path.abspath( os.path.join( repo_dir, file_data.filename ) ) - file_path = os.path.join( file_data.filename ) - shutil.move( uploaded_file.name, full_path ) + full_path = os.path.abspath( os.path.join( repo_dir, uploaded_file_filename ) ) + file_path = os.path.join( uploaded_file_filename ) + shutil.move( uploaded_file_name, full_path ) repo.dirstate.add( file_path ) files_to_commit.append( file_path ) else: + # We have a repository that is not new (it contains files). + if uncompress_file and ( isgzip or isbz2 ): + uploaded_file_filename = self.uncompress( repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 ) # Clone the repository to a temporary location. tmp_dir, cloned_repo_dir = self.__hg_clone( trans, repository, repo_dir, current_working_dir ) # Move the uploaded files to the upload_point within the cloned repository. - self.__move_to_upload_point( upload_point, uploaded_file, file_data, cloned_repo_dir, istar, tar ) + self.__move_to_upload_point( upload_point, uploaded_file, uploaded_file_name, uploaded_file_filename, cloned_repo_dir, istar, tar ) # Commit and push the changes from the cloned repo to the master repo. - self.__hg_push( trans, repository, file_data, commit_message, current_working_dir, cloned_repo_dir, repo_dir, tmp_dir ) + self.__hg_push( trans, repository, file_data.filename, uncompress_file, commit_message, current_working_dir, cloned_repo_dir, repo_dir, tmp_dir ) if ok: if files_to_commit: repo.dirstate.write() @@ -132,7 +152,11 @@ os.chdir( repo_dir ) os.system( 'hg update > /dev/null 2>&1' ) os.chdir( current_working_dir ) - message = "The file '%s' has been successfully uploaded to the repository." % file_data.filename + if uncompress_file: + uncompress_str = ' uncompressed and ' + else: + uncompress_str = ' ' + message = "The file '%s' has been successfully%suploaded to the repository." % ( uploaded_file_filename, uncompress_str ) trans.response.send_redirect( web.url_for( controller='repository', action='browse_repository', message=message, @@ -146,8 +170,50 @@ return trans.fill_template( '/webapps/community/repository/upload.mako', repository=repository, commit_message=commit_message, + uncompress_file=uncompress_file, message=message, status=status ) + def uncompress( self, repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 ): + if isgzip: + self.__handle_gzip( repository, uploaded_file_name ) + return uploaded_file_filename.rstrip( '.gz' ) + if isbz2: + self.__handle_bz2( repository, uploaded_file_name ) + return uploaded_file_filename.rstrip( '.bz2' ) + def __handle_gzip( self, repository, uploaded_file_name ): + fd, uncompressed = tempfile.mkstemp( prefix='repo_%d_upload_gunzip_' % repository.id, dir=os.path.dirname( uploaded_file_name ), text=False ) + gzipped_file = gzip.GzipFile( uploaded_file_name, 'rb' ) + while 1: + try: + chunk = gzipped_file.read( CHUNK_SIZE ) + except IOError, e: + os.close( fd ) + os.remove( uncompressed ) + log.exception( 'Problem uncompressing gz data "%s": %s' % ( uploaded_file_name, str( e ) ) ) + return + if not chunk: + break + os.write( fd, chunk ) + os.close( fd ) + gzipped_file.close() + shutil.move( uncompressed, uploaded_file_name ) + def __handle_bz2( self, repository, uploaded_file_name ): + fd, uncompressed = tempfile.mkstemp( prefix='repo_%d_upload_bunzip2_' % repository.id, dir=os.path.dirname( uploaded_file_name ), text=False ) + bzipped_file = bz2.BZ2File( uploaded_file_name, 'rb' ) + while 1: + try: + chunk = bzipped_file.read( CHUNK_SIZE ) + except IOError: + os.close( fd ) + os.remove( uncompressed ) + log.exception( 'Problem uncompressing bz2 data "%s": %s' % ( uploaded_file_name, str( e ) ) ) + return + if not chunk: + break + os.write( fd, chunk ) + os.close( fd ) + bzipped_file.close() + shutil.move( uncompressed, uploaded_file_name ) def __hg_clone( self, trans, repository, repo_dir, current_working_dir ): tmp_dir = tempfile.mkdtemp() tmp_archive_dir = os.path.join( tmp_dir, 'tmp_archive_dir' ) @@ -160,9 +226,9 @@ os.chdir( current_working_dir ) cloned_repo_dir = os.path.join( tmp_archive_dir, 'repo_%d' % repository.id ) return tmp_dir, cloned_repo_dir - def __hg_push( self, trans, repository, file_data, commit_message, current_working_dir, cloned_repo_dir, repo_dir, tmp_dir ): + def __hg_push( self, trans, repository, filename, uncompress_file, commit_message, current_working_dir, cloned_repo_dir, repo_dir, tmp_dir ): repo = hg.repository( ui.ui(), repo_dir ) - tip = repo.changectx( "tip" ) + tip = repo[ 'tip' ] # We want these change sets to be associated with the owner of the repository, so we'll # set the HGUSER environment variable accordingly. os.environ[ 'HGUSER' ] = trans.user.username @@ -184,19 +250,30 @@ os.system( 'hg update > /dev/null 2>&1' ) os.chdir( current_working_dir ) shutil.rmtree( tmp_dir ) - if tip != repo.changectx( "tip" ): - message = "The file '%s' has been successfully uploaded to the repository." % file_data.filename + repo = hg.repository( ui.ui(), repo_dir ) + if tip != repo[ 'tip' ]: + if uncompress_file: + uncompress_str = ' uncompressed and ' + else: + uncompress_str = ' ' + message = "The file '%s' has been successfully%suploaded to the repository." % ( filename, uncompress_str ) else: message = 'No changes in uploaded files.' trans.response.send_redirect( web.url_for( controller='repository', action='browse_repository', message=message, id=trans.security.encode_id( repository.id ) ) ) - def __move_to_upload_point( self, upload_point, uploaded_file, file_data, cloned_repo_dir, istar, tar ): + def __move_to_upload_point( self, upload_point, uploaded_file, uploaded_file_name, uploaded_file_filename, cloned_repo_dir, istar, tar ): if upload_point is not None: - full_path = os.path.abspath( os.path.join( cloned_repo_dir, upload_point, file_data.filename ) ) + if istar: + full_path = os.path.abspath( os.path.join( cloned_repo_dir, upload_point ) ) + else: + full_path = os.path.abspath( os.path.join( cloned_repo_dir, upload_point, uploaded_file_filename ) ) else: - full_path = os.path.abspath( os.path.join( cloned_repo_dir, file_data.filename ) ) + if istar: + full_path = os.path.abspath( os.path.join( cloned_repo_dir ) ) + else: + full_path = os.path.abspath( os.path.join( cloned_repo_dir, uploaded_file_filename ) ) if istar: # Extract the uploaded tarball to the load_point within the cloned repository hierarchy tar.extractall( path=full_path ) @@ -204,7 +281,7 @@ uploaded_file.close() else: # Move the uploaded file to the load_point within the cloned repository hierarchy - shutil.move( uploaded_file.name, full_path ) + shutil.move( uploaded_file_name, full_path ) def __check_archive( self, archive ): for member in archive.getmembers(): # Allow regular files and directories only --- a/templates/webapps/community/repository/upload.mako Fri Jun 17 14:27:47 2011 -0400 +++ b/templates/webapps/community/repository/upload.mako Fri Jun 17 15:00:22 2011 -0400 @@ -72,6 +72,30 @@ </div><div style="clear: both"></div></div> + + <div class="form-row"> + <% + if uncompress_file: + yes_selected = 'selected' + no_selected = '' + else: + yes_selected = '' + no_selected = 'selected' + %> + <label>Uncompress files?</label> + <div class="form-row-input"> + <select name="uncompress_file"> + <option value="true" ${yes_selected}>Yes + <option value="false" ${no_selected}>No + </select> + </div> + <div class="toolParamHelp" style="clear: both;"> + Supported compression types are gz and bz2. If <b>Yes</b> is selected, the uploaded file will be uncompressed. However, + if the uploaded file is an archive that contains compressed files, the contained files will not be uncompressed. For + example, if the uploaded compressed file is some_file.tar.gz, some_file.tar will be uncompressed and extracted, but if + some_file.tar contains 4.bed.gz, the contained file 4.bed.gz will not be uncompressed. + </div> + </div><div class="form-row"><label>Message:</label><div class="form-row-input"> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket