commit/galaxy-central: greg: Fixes when pushing changes to a repository in the tool shed from the command line: (a) only allow changes to a single file named tool_dependencies.xml to be pushed to a repository whose type is "Tool dependency definition" and (b) for any changes that are made to dependency definition files, validate the attributes of any <repository> tags, making sure that all required attributes (toolshed, name, owner, changeset_revision) are defined since automatically popula
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/ff21a344b9e1/ Changeset: ff21a344b9e1 User: greg Date: 2013-07-17 21:34:54 Summary: Fixes when pushing changes to a repository in the tool shed from the command line: (a) only allow changes to a single file named tool_dependencies.xml to be pushed to a repository whose type is "Tool dependency definition" and (b) for any changes that are made to dependency definition files, validate the attributes of any <repository> tags, making sure that all required attributes (toolshed, name, owner, changeset_revision) are defined since automatically populating these attributes is supported only when using the tool shed's upload utility. Affected #: 2 files diff -r ef5cc1d272d2805d1c6597b200f9df58a291d863 -r ff21a344b9e14e0deb896bbe89a45c70ad608331 lib/galaxy/webapps/tool_shed/framework/middleware/hg.py --- a/lib/galaxy/webapps/tool_shed/framework/middleware/hg.py +++ b/lib/galaxy/webapps/tool_shed/framework/middleware/hg.py @@ -1,19 +1,28 @@ -""" -Middleware for handling hg authentication for users pushing change sets to local repositories. -""" -import os, logging +"""Middle-ware for handling hg authentication for users pushing change sets to local repositories.""" +import os +import logging import sqlalchemy +import sys +import tempfile from paste.auth.basic import AuthBasicAuthenticator from paste.httpheaders import AUTH_TYPE from paste.httpheaders import REMOTE_USER from galaxy.util import asbool +from galaxy.util import json from galaxy.webapps.tool_shed import model from galaxy.util.hash_util import new_secure_hash +import tool_shed.util.shed_util_common as suc +from tool_shed.util import commit_util +import tool_shed.repository_types.util as rt_util + +from galaxy import eggs +eggs.require( 'mercurial' ) import mercurial.__version__ log = logging.getLogger(__name__) +CHUNK_SIZE = 65536 class Hg( object ): @@ -23,10 +32,6 @@ self.config = config # Authenticate this mercurial request using basic authentication self.authentication = AuthBasicAuthenticator( 'hgweb in the tool shed', self.__basic_authentication ) - self.remote_address = None - self.repository = None - self.username = None - self.action = None # Determine the database url if 'database_connection' in self.config: self.db_url = self.config[ 'database_connection' ] @@ -42,26 +47,21 @@ # # Increment the value of the times_downloaded column in the repository table for the cloned repository. if 'PATH_INFO' in environ: - path_info = environ[ 'PATH_INFO' ].lstrip( '/' ) - # An example of path_info is: '/repos/test/column1' - path_info_components = path_info.split( '/' ) - username = path_info_components[1] - name = path_info_components[2] # Instantiate a database connection engine = sqlalchemy.create_engine( self.db_url ) connection = engine.connect() - result_set = connection.execute( "select id from galaxy_user where username = '%s'" % username.lower() ) - for row in result_set: - # Should only be 1 row... - user_id = row[ 'id' ] - result_set = connection.execute( "select times_downloaded from repository where user_id = %d and name = '%s'" % ( user_id, name.lower() ) ) + path_info = environ[ 'PATH_INFO' ].lstrip( '/' ) + user_id, repository_name = self.__get_user_id_repository_name_from_path_info( connection, path_info ) + sql_cmd = "SELECT times_downloaded FROM repository WHERE user_id = %d AND name = '%s'" % ( user_id, repository_name.lower() ) + result_set = connection.execute( sql_cmd ) for row in result_set: # Should only be 1 row... times_downloaded = row[ 'times_downloaded' ] times_downloaded += 1 - connection.execute( "update repository set times_downloaded = %d where user_id = %d and name = '%s'" % ( times_downloaded, user_id, name.lower() ) ) + sql_cmd = "UPDATE repository SET times_downloaded = %d WHERE user_id = %d AND name = '%s'" % ( times_downloaded, user_id, repository_name.lower() ) + connection.execute( sql_cmd ) connection.close() - if cmd in [ 'unbundle', 'pushkey' ]: + elif cmd in [ 'unbundle', 'pushkey' ]: # This is an hg push from the command line. When doing this, the following commands, in order, # will be retrieved from environ (see the docs at http://mercurial.selenic.com/wiki/WireProtocol): # # If mercurial version >= '2.2.3': capabilities -> batch -> branchmap -> unbundle -> listkeys -> pushkey -> listkeys @@ -87,6 +87,71 @@ # If all of these mechanisms fail, Mercurial will fail, printing an error message. In this case, it # will not let you commit until you set up a username. result = self.authentication( environ ) + if not isinstance( result, str ) and cmd == 'unbundle' and 'wsgi.input' in environ: + bundle_data_stream = environ[ 'wsgi.input' ] + # Convert the incoming mercurial bundle into a json object and persit it to a temporary file for inspection. + fh = tempfile.NamedTemporaryFile( 'wb', prefix="tmp-hg-bundle" ) + tmp_filename = fh.name + fh.close() + fh = open( tmp_filename, 'wb' ) + while 1: + chunk = bundle_data_stream.read( CHUNK_SIZE ) + if not chunk: + break + fh.write( chunk ) + fh.close() + fh = open( tmp_filename, 'rb' ) + changeset_groups = json.from_json_string( commit_util.bundle_to_json( fh ) ) + fh.close() + try: + os.unlink( tmp_filename ) + except: + pass + if changeset_groups: + # Check the repository type to make sure inappropriate files are not being pushed. + if 'PATH_INFO' in environ: + # Instantiate a database connection + engine = sqlalchemy.create_engine( self.db_url ) + connection = engine.connect() + path_info = environ[ 'PATH_INFO' ].lstrip( '/' ) + user_id, repository_name = self.__get_user_id_repository_name_from_path_info( connection, path_info ) + sql_cmd = "SELECT type FROM repository WHERE user_id = %d AND name = '%s'" % ( user_id, repository_name.lower() ) + result_set = connection.execute( sql_cmd ) + for row in result_set: + # Should only be 1 row... + repository_type = str( row[ 'type' ] ) + if repository_type == rt_util.TOOL_DEPENDENCY_DEFINITION: + # Handle repositories of type tool_dependency_definition, which can only contain a single file named tool_dependencies.xml. + for entry in changeset_groups: + if len( entry ) == 2: + # We possibly found an altered file entry. + filename, change_list = entry + if filename and isinstance( filename, str ): + if filename == suc.TOOL_DEPENDENCY_DEFINITION_FILENAME: + # Make sure the any complex repository dependency definitions contain valid <repository> tags. + is_valid, error_msg = commit_util.repository_tags_are_valid( filename, change_list ) + if not is_valid: + log.debug( error_msg ) + return self.__display_exception_remotely( start_response, error_msg ) + else: + msg = "Only a single file named tool_dependencies.xml can be pushed to a repository of type 'Tool dependency definition'." + log.debug( msg ) + return self.__display_exception_remotely( start_response, msg ) + else: + # If the changeset includes changes to dependency definition files, make sure tag sets are not missing "toolshed" or + # "changeset_revision" attributes since automatically populating them is not supported when pushing from the command line. + # These attributes are automatically populated only when using the tool shed upload utility. + for entry in changeset_groups: + if len( entry ) == 2: + # We possibly found an altered file entry. + filename, change_list = entry + if filename and isinstance( filename, str ): + if filename in [ suc.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME, suc.TOOL_DEPENDENCY_DEFINITION_FILENAME ]: + # We check both files since tool dependency definitions files can contain complex repository dependency definitions. + is_valid, error_msg = commit_util.repository_tags_are_valid( filename, change_list ) + if not is_valid: + log.debug( error_msg ) + return self.__display_exception_remotely( start_response, error_msg ) if isinstance( result, str ): # Authentication was successful AUTH_TYPE.update( environ, 'basic' ) @@ -95,6 +160,13 @@ return result.wsgi_application( environ, start_response ) return self.app( environ, start_response ) + def __display_exception_remotely( self, start_response, msg ): + # Display the exception to the remote user's command line. + status = "500 %s" % msg + response_headers = [ ("content-type", "text/plain") ] + start_response( status, response_headers, sys.exc_info() ) + return [ msg ] + def __get_hg_command( self, **kwd ): """Pulls mercurial commands from environ[ 'QUERY_STRING" ] and returns them.""" if 'QUERY_STRING' in kwd: @@ -103,6 +175,18 @@ return qry.split( '=' )[ -1 ] return None + def __get_user_id_repository_name_from_path_info( self, db_connection, path_info ): + # An example of path_info is: '/repos/test/column1' + path_info_components = path_info.split( '/' ) + username = path_info_components[ 1 ] + repository_name = path_info_components[ 2 ] + # Get the id of the current user using hg from the command line. + result_set = db_connection.execute( "select id from galaxy_user where username = '%s'" % username.lower() ) + for row in result_set: + # Should only be 1 row... + user_id = row[ 'id' ] + return user_id, repository_name + def __basic_authentication( self, environ, username, password ): """The environ parameter is needed in basic authentication. We also check it if use_remote_user is true.""" if asbool( self.config.get( 'use_remote_user', False ) ): @@ -140,7 +224,7 @@ result_set = connection.execute( "select email, username, password from galaxy_user where email = '%s'" % ru_email ) for row in result_set: # Should only be 1 row... - db_email = row[ 'email' ] + db_email = row[ 'email' ] db_password = row[ 'password' ] db_username = row[ 'username' ] connection.close() diff -r ef5cc1d272d2805d1c6597b200f9df58a291d863 -r ff21a344b9e14e0deb896bbe89a45c70ad608331 lib/tool_shed/util/commit_util.py --- a/lib/tool_shed/util/commit_util.py +++ b/lib/tool_shed/util/commit_util.py @@ -1,7 +1,9 @@ +import cStringIO import logging import os import pkg_resources import shutil +import struct import tempfile from galaxy import util from galaxy.datatypes import checkers @@ -10,19 +12,29 @@ import tool_shed.util.shed_util_common as suc from tool_shed.util import tool_util from tool_shed.util import xml_util -from galaxy import eggs import tool_shed.repository_types.util as rt_util +from galaxy import eggs eggs.require( 'mercurial' ) from mercurial import commands from mercurial import hg from mercurial import ui +from mercurial.changegroup import readbundle +from mercurial.changegroup import readexactly +from mercurial.changegroup import writebundle log = logging.getLogger( __name__ ) UNDESIRABLE_DIRS = [ '.hg', '.svn', '.git', '.cvs' ] UNDESIRABLE_FILES = [ '.hg_archival.txt', 'hgrc', '.DS_Store' ] +def bundle_to_json( fh ): + """Convert the received HG10xx data stream (a mercurial 1.0 bundle created using hg push from the command line) to a json object.""" + # See http://www.wstein.org/home/wstein/www/home/was/patches/hg_json + hg_unbundle10_obj = readbundle( fh, None ) + groups = [ group for group in unpack_groups( hg_unbundle10_obj ) ] + return json.to_json_string( groups, indent=4 ) + def check_archive( repository, archive ): for member in archive.getmembers(): # Allow regular files and directories only @@ -63,6 +75,23 @@ message = 'The file "%s" contains image content.\n' % str( file_path ) return message +def get_change_lines_in_file_for_tag( tag, change_dict ): + """ + The received change_dict is the jsonified version of the changes to a file in a changeset being pushed to the tool shed from the command line. + This method cleans and returns appropriate lines for inspection. + """ + cleaned_lines = [] + data_list = change_dict.get( 'data', [] ) + for data_dict in data_list: + block = data_dict.get( 'block', '' ) + lines = block.split( '\\n' ) + for line in lines: + index = line.find( tag ) + if index > -1: + line = line[ index: ] + cleaned_lines.append( line ) + return cleaned_lines + def get_upload_point( repository, **kwd ): upload_point = kwd.get( 'upload_point', None ) if upload_point is not None: @@ -149,8 +178,8 @@ except OSError, e: # The directory is not empty. pass - # See if any admin users have chosen to receive email alerts when a repository is - # updated. If so, check every uploaded file to ensure content is appropriate. + # See if any admin users have chosen to receive email alerts when a repository is updated. If so, check every uploaded file to ensure + # content is appropriate. check_contents = check_file_contents_for_email_alerts( trans ) for filename_in_archive in filenames_in_archive: # Check file content to ensure it is appropriate. @@ -307,6 +336,39 @@ return altered, root return False, None +def repository_tag_is_valid( filename, line ): + """ + Checks changes made to <repository> tags in a dependency definition file being pushed to the tool shed from the command line to ensure that + all required attributes exist. + """ + required_attributes = [ 'toolshed', 'name', 'owner', 'changeset_revision' ] + defined_attributes = line.split() + for required_attribute in required_attributes: + defined = False + for defined_attribute in defined_attributes: + if defined_attribute.startswith( required_attribute ): + defined = True + break + if not defined: + error_msg = 'The %s file contains a <repository> tag that is missing the required attribute %s. ' % ( filename, required_attribute ) + error_msg += 'Automatically populating dependency definition attributes occurs only when using the tool shed upload utility. ' + return False, error_msg + return True, '' + +def repository_tags_are_valid( filename, change_list ): + """ + Make sure the any complex repository dependency definitions contain valid <repository> tags when pushing changes to the tool shed on the command + line. + """ + tag = '<repository' + for change_dict in change_list: + lines = get_change_lines_in_file_for_tag( tag, change_dict ) + for line in lines: + is_valid, error_msg = repository_tag_is_valid( filename, line ) + if not is_valid: + return False, error_msg + return True, '' + def uncompress( repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 ): if isgzip: handle_gzip( repository, uploaded_file_name ) @@ -314,3 +376,60 @@ if isbz2: handle_bz2( repository, uploaded_file_name ) return uploaded_file_filename.rstrip( '.bz2' ) + +def unpack_chunks( hg_unbundle10_obj ): + """ + This method provides a generator of parsed chunks of a "group" in a mercurial unbundle10 object which is created when a changeset that is pushed + to a tool shed repository using hg push from the command line is read using readbundle. + """ + while True: + length, = struct.unpack( '>l', readexactly( hg_unbundle10_obj, 4 ) ) + if length <= 4: + # We found a "null chunk", which ends the group. + break + if length < 84: + raise Exception( "negative data length" ) + node, p1, p2, cs = struct.unpack( '20s20s20s20s', readexactly( hg_unbundle10_obj, 80 ) ) + yield { 'node': node.encode( 'hex' ), + 'p1': p1.encode( 'hex' ), + 'p2': p2.encode( 'hex' ), + 'cs': cs.encode( 'hex' ), + 'data': [ patch for patch in unpack_patches( hg_unbundle10_obj, length - 84 ) ] } + +def unpack_groups( hg_unbundle10_obj ): + """ + This method provides a generator of parsed groups from a mercurial unbundle10 object which is created when a changeset that is pushed + to a tool shed repository using hg push from the command line is read using readbundle. + """ + # Process the changelog group. + yield [ chunk for chunk in unpack_chunks( hg_unbundle10_obj ) ] + # Process the manifest group. + yield [ chunk for chunk in unpack_chunks( hg_unbundle10_obj ) ] + while True: + length, = struct.unpack( '>l', readexactly( hg_unbundle10_obj, 4 ) ) + if length <= 4: + # We found a "null meta chunk", which ends the changegroup. + break + filename = readexactly( hg_unbundle10_obj, length-4 ).encode( 'string_escape' ) + # Process the file group. + yield ( filename, [ chunk for chunk in unpack_chunks( hg_unbundle10_obj ) ] ) + +def unpack_patches( hg_unbundle10_obj, remaining ): + """ + This method provides a generator of patches from the data field in a chunk. As there is no delimiter for this data field, a length argument is + required. + """ + while remaining >= 12: + start, end, blocklen = struct.unpack( '>lll', readexactly( hg_unbundle10_obj, 12 ) ) + remaining -= 12 + if blocklen > remaining: + raise Exception( "unexpected end of patch stream" ) + block = readexactly( hg_unbundle10_obj, blocklen ) + remaining -= blocklen + yield { 'start': start, + 'end': end, + 'blocklen': blocklen, + 'block': block.encode( 'string_escape' ) } + if remaining > 0: + print remaining + raise Exception( "unexpected end of patch stream" ) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org