17 Jul 2013

1 new commit in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/ff21a344b9e1/
Changeset:   ff21a344b9e1
User:        greg
Date:        2013-07-17 21:34:54
Summary:     Fixes when pushing changes to a repository in the tool shed from the command line: (a) only allow changes to a single file named tool_dependencies.xml to be pushed to a repository whose type is "Tool dependency definition" and (b) for any changes that are made to dependency definition files, validate the attributes of any <repository> tags, making sure that all required attributes (toolshed, name, owner, changeset_revision) are defined since automatically populating these attributes is supported only when using the tool shed's upload utility.
Affected #:  2 files

diff -r ef5cc1d272d2805d1c6597b200f9df58a291d863 -r ff21a344b9e14e0deb896bbe89a45c70ad608331 lib/galaxy/webapps/tool_shed/framework/middleware/hg.py

--- a/lib/galaxy/webapps/tool_shed/framework/middleware/hg.py
+++ b/lib/galaxy/webapps/tool_shed/framework/middleware/hg.py
@@ -1,19 +1,28 @@
-"""
-Middleware for handling hg authentication for users pushing change sets to local repositories.
-"""
-import os, logging
+"""Middle-ware for handling hg authentication for users pushing change sets to local repositories."""
+import os
+import logging
 import sqlalchemy
+import sys
+import tempfile
 from paste.auth.basic import AuthBasicAuthenticator
 from paste.httpheaders import AUTH_TYPE
 from paste.httpheaders import REMOTE_USER
 
 from galaxy.util import asbool
+from galaxy.util import json
 from galaxy.webapps.tool_shed import model
 from galaxy.util.hash_util import new_secure_hash
+import tool_shed.util.shed_util_common as suc
+from tool_shed.util import commit_util
+import tool_shed.repository_types.util as rt_util
+
+from galaxy import eggs
+eggs.require( 'mercurial' )
 import mercurial.__version__
 
 log = logging.getLogger(__name__)
 
+CHUNK_SIZE = 65536
 
 class Hg( object ):
 
@@ -23,10 +32,6 @@
         self.config = config
         # Authenticate this mercurial request using basic authentication
         self.authentication = AuthBasicAuthenticator( 'hgweb in the tool shed', self.__basic_authentication )
-        self.remote_address = None
-        self.repository = None
-        self.username = None
-        self.action = None
         # Determine the database url
         if 'database_connection' in self.config:
             self.db_url = self.config[ 'database_connection' ]
@@ -42,26 +47,21 @@
             #
             # Increment the value of the times_downloaded column in the repository table for the cloned repository.
             if 'PATH_INFO' in environ:
-                path_info = environ[ 'PATH_INFO' ].lstrip( '/' )
-                # An example of path_info is: '/repos/test/column1'
-                path_info_components = path_info.split( '/' )
-                username = path_info_components[1]
-                name = path_info_components[2]
                 # Instantiate a database connection
                 engine = sqlalchemy.create_engine( self.db_url )
                 connection = engine.connect()
-                result_set = connection.execute( "select id from galaxy_user where username = '%s'" % username.lower() )
-                for row in result_set:
-                    # Should only be 1 row...
-                    user_id = row[ 'id' ]
-                result_set = connection.execute( "select times_downloaded from repository where user_id = %d and name = '%s'" % ( user_id, name.lower() ) )
+                path_info = environ[ 'PATH_INFO' ].lstrip( '/' )
+                user_id, repository_name = self.__get_user_id_repository_name_from_path_info( connection, path_info )
+                sql_cmd = "SELECT times_downloaded FROM repository WHERE user_id = %d AND name = '%s'" % ( user_id, repository_name.lower() )
+                result_set = connection.execute( sql_cmd )
                 for row in result_set:
                     # Should only be 1 row...
                     times_downloaded = row[ 'times_downloaded' ]
                 times_downloaded += 1
-                connection.execute( "update repository set times_downloaded = %d where user_id = %d and name = '%s'" % ( times_downloaded, user_id, name.lower() ) )
+                sql_cmd = "UPDATE repository SET times_downloaded = %d WHERE user_id = %d AND name = '%s'" % ( times_downloaded, user_id, repository_name.lower() )
+                connection.execute( sql_cmd )
                 connection.close()
-        if cmd in [ 'unbundle', 'pushkey' ]:
+        elif cmd in [ 'unbundle', 'pushkey' ]:
             # This is an hg push from the command line.  When doing this, the following commands, in order,
             # will be retrieved from environ (see the docs at http://mercurial.selenic.com/wiki/WireProtocol):
             # # If mercurial version >= '2.2.3': capabilities -> batch -> branchmap -> unbundle -> listkeys -> pushkey -> listkeys
@@ -87,6 +87,71 @@
             # If all of these mechanisms fail, Mercurial will fail, printing an error message. In this case, it 
             # will not let you commit until you set up a username.
             result = self.authentication( environ )
+            if not isinstance( result, str ) and cmd == 'unbundle' and 'wsgi.input' in environ:
+                bundle_data_stream = environ[ 'wsgi.input' ]
+                # Convert the incoming mercurial bundle into a json object and persit it to a temporary file for inspection.
+                fh = tempfile.NamedTemporaryFile( 'wb', prefix="tmp-hg-bundle"  )
+                tmp_filename = fh.name
+                fh.close()
+                fh = open( tmp_filename, 'wb' )
+                while 1:
+                    chunk = bundle_data_stream.read( CHUNK_SIZE )
+                    if not chunk:
+                        break
+                    fh.write( chunk )
+                fh.close()
+                fh = open( tmp_filename, 'rb' )
+                changeset_groups = json.from_json_string( commit_util.bundle_to_json( fh ) )
+                fh.close()
+                try:
+                    os.unlink( tmp_filename )
+                except:
+                    pass
+                if changeset_groups:
+                    # Check the repository type to make sure inappropriate files are not being pushed.
+                    if 'PATH_INFO' in environ:
+                        # Instantiate a database connection
+                        engine = sqlalchemy.create_engine( self.db_url )
+                        connection = engine.connect()
+                        path_info = environ[ 'PATH_INFO' ].lstrip( '/' )
+                        user_id, repository_name = self.__get_user_id_repository_name_from_path_info( connection, path_info )
+                        sql_cmd = "SELECT type FROM repository WHERE user_id = %d AND name = '%s'" % ( user_id, repository_name.lower() )
+                        result_set = connection.execute( sql_cmd )
+                        for row in result_set:
+                            # Should only be 1 row...
+                            repository_type = str( row[ 'type' ] )
+                        if repository_type == rt_util.TOOL_DEPENDENCY_DEFINITION:
+                            # Handle repositories of type tool_dependency_definition, which can only contain a single file named tool_dependencies.xml.
+                            for entry in changeset_groups:
+                                if len( entry ) == 2:
+                                    # We possibly found an altered file entry.
+                                    filename, change_list = entry
+                                    if filename and isinstance( filename, str ):
+                                        if filename == suc.TOOL_DEPENDENCY_DEFINITION_FILENAME:
+                                            # Make sure the any complex repository dependency definitions contain valid <repository> tags.
+                                            is_valid, error_msg = commit_util.repository_tags_are_valid( filename, change_list )
+                                            if not is_valid:
+                                                log.debug( error_msg )
+                                                return self.__display_exception_remotely( start_response, error_msg )
+                                        else:
+                                            msg = "Only a single file named tool_dependencies.xml can be pushed to a repository of type 'Tool dependency definition'."
+                                            log.debug( msg )
+                                            return self.__display_exception_remotely( start_response, msg )
+                        else:
+                            # If the changeset includes changes to dependency definition files, make sure tag sets are not missing "toolshed" or
+                            # "changeset_revision" attributes since automatically populating them is not supported when pushing from the command line.
+                            # These attributes are automatically populated only when using the tool shed upload utility.
+                            for entry in changeset_groups:
+                                if len( entry ) == 2:
+                                    # We possibly found an altered file entry.
+                                    filename, change_list = entry
+                                    if filename and isinstance( filename, str ):
+                                        if filename in [ suc.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME, suc.TOOL_DEPENDENCY_DEFINITION_FILENAME ]:
+                                            # We check both files since tool dependency definitions files can contain complex repository dependency definitions.
+                                            is_valid, error_msg = commit_util.repository_tags_are_valid( filename, change_list )
+                                            if not is_valid:
+                                                log.debug( error_msg )
+                                                return self.__display_exception_remotely( start_response, error_msg )
             if isinstance( result, str ):
                 # Authentication was successful
                 AUTH_TYPE.update( environ, 'basic' )
@@ -95,6 +160,13 @@
                 return result.wsgi_application( environ, start_response )
         return self.app( environ, start_response )
 
+    def __display_exception_remotely( self, start_response, msg ):
+        # Display the exception to the remote user's command line.
+        status = "500 %s" % msg
+        response_headers = [ ("content-type", "text/plain") ]
+        start_response( status, response_headers, sys.exc_info() )
+        return [ msg ]
+
     def __get_hg_command( self, **kwd ):
         """Pulls mercurial commands from environ[ 'QUERY_STRING" ] and returns them."""
         if 'QUERY_STRING' in kwd:
@@ -103,6 +175,18 @@
                     return qry.split( '=' )[ -1 ]
         return None
 
+    def __get_user_id_repository_name_from_path_info( self, db_connection, path_info ):
+        # An example of path_info is: '/repos/test/column1'
+        path_info_components = path_info.split( '/' )
+        username = path_info_components[ 1 ]
+        repository_name = path_info_components[ 2 ]
+        # Get the id of the current user using hg from the command line.
+        result_set = db_connection.execute( "select id from galaxy_user where username = '%s'" % username.lower() )
+        for row in result_set:
+            # Should only be 1 row...
+            user_id = row[ 'id' ]
+        return user_id, repository_name
+
     def __basic_authentication( self, environ, username, password ):
         """The environ parameter is needed in basic authentication.  We also check it if use_remote_user is true."""
         if asbool( self.config.get( 'use_remote_user', False ) ):
@@ -140,7 +224,7 @@
         result_set = connection.execute( "select email, username, password from galaxy_user where email = '%s'" % ru_email )
         for row in result_set:
             # Should only be 1 row...
-            db_email    = row[ 'email'    ]
+            db_email = row[ 'email' ]
             db_password = row[ 'password' ]
             db_username = row[ 'username' ]
         connection.close()

diff -r ef5cc1d272d2805d1c6597b200f9df58a291d863 -r ff21a344b9e14e0deb896bbe89a45c70ad608331 lib/tool_shed/util/commit_util.py
--- a/lib/tool_shed/util/commit_util.py
+++ b/lib/tool_shed/util/commit_util.py
@@ -1,7 +1,9 @@
+import cStringIO
 import logging
 import os
 import pkg_resources
 import shutil
+import struct
 import tempfile
 from galaxy import util
 from galaxy.datatypes import checkers
@@ -10,19 +12,29 @@
 import tool_shed.util.shed_util_common as suc
 from tool_shed.util import tool_util
 from tool_shed.util import xml_util
-from galaxy import eggs
 import tool_shed.repository_types.util as rt_util
 
+from galaxy import eggs
 eggs.require( 'mercurial' )
 from mercurial import commands
 from mercurial import hg
 from mercurial import ui
+from mercurial.changegroup import readbundle
+from mercurial.changegroup import readexactly
+from mercurial.changegroup import writebundle
 
 log = logging.getLogger( __name__ )
 
 UNDESIRABLE_DIRS = [ '.hg', '.svn', '.git', '.cvs' ]
 UNDESIRABLE_FILES = [ '.hg_archival.txt', 'hgrc', '.DS_Store' ]
 
+def bundle_to_json( fh ):
+    """Convert the received HG10xx data stream (a mercurial 1.0 bundle created using hg push from the command line) to a json object."""
+    # See http://www.wstein.org/home/wstein/www/home/was/patches/hg_json
+    hg_unbundle10_obj = readbundle( fh, None )
+    groups = [ group for group in unpack_groups( hg_unbundle10_obj ) ]
+    return json.to_json_string( groups, indent=4 )
+    
 def check_archive( repository, archive ):
     for member in archive.getmembers():
         # Allow regular files and directories only
@@ -63,6 +75,23 @@
         message = 'The file "%s" contains image content.\n' % str( file_path )
     return message
 
+def get_change_lines_in_file_for_tag( tag, change_dict ):
+    """
+    The received change_dict is the jsonified version of the changes to a file in a changeset being pushed to the tool shed from the command line.
+    This method cleans and returns appropriate lines for inspection.
+    """
+    cleaned_lines = []
+    data_list = change_dict.get( 'data', [] )
+    for data_dict in data_list:
+        block = data_dict.get( 'block', '' )
+        lines = block.split( '\\n' )
+        for line in lines:
+            index = line.find( tag )
+            if index > -1:
+                line = line[ index: ]
+                cleaned_lines.append( line )
+    return cleaned_lines
+
 def get_upload_point( repository, **kwd ):
     upload_point = kwd.get( 'upload_point', None )
     if upload_point is not None:
@@ -149,8 +178,8 @@
                     except OSError, e:
                         # The directory is not empty.
                         pass
-    # See if any admin users have chosen to receive email alerts when a repository is
-    # updated.  If so, check every uploaded file to ensure content is appropriate.
+    # See if any admin users have chosen to receive email alerts when a repository is updated.  If so, check every uploaded file to ensure
+    # content is appropriate.
     check_contents = check_file_contents_for_email_alerts( trans )
     for filename_in_archive in filenames_in_archive:
         # Check file content to ensure it is appropriate.
@@ -307,6 +336,39 @@
         return altered, root
     return False, None
 
+def repository_tag_is_valid( filename, line ):
+    """
+    Checks changes made to <repository> tags in a dependency definition file being pushed to the tool shed from the command line to ensure that
+    all required attributes exist.
+    """
+    required_attributes = [ 'toolshed', 'name', 'owner', 'changeset_revision' ]
+    defined_attributes = line.split()
+    for required_attribute in required_attributes:
+        defined = False
+        for defined_attribute in defined_attributes:
+            if defined_attribute.startswith( required_attribute ):
+                defined = True
+                break
+        if not defined:
+            error_msg = 'The %s file contains a <repository> tag that is missing the required attribute %s.  ' % ( filename, required_attribute )
+            error_msg += 'Automatically populating dependency definition attributes occurs only when using the tool shed upload utility.  '
+            return False, error_msg
+    return True, ''
+
+def repository_tags_are_valid( filename, change_list ):
+    """
+    Make sure the any complex repository dependency definitions contain valid <repository> tags when pushing changes to the tool shed on the command
+    line.
+    """
+    tag = '<repository'
+    for change_dict in change_list:
+        lines = get_change_lines_in_file_for_tag( tag, change_dict )
+        for line in lines:
+            is_valid, error_msg = repository_tag_is_valid( filename, line )
+            if not is_valid:
+                return False, error_msg
+    return True, ''
+
 def uncompress( repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 ):
     if isgzip:
         handle_gzip( repository, uploaded_file_name )
@@ -314,3 +376,60 @@
     if isbz2:
         handle_bz2( repository, uploaded_file_name )
         return uploaded_file_filename.rstrip( '.bz2' )
+
+def unpack_chunks( hg_unbundle10_obj ):
+    """
+    This method provides a generator of parsed chunks of a "group" in a mercurial unbundle10 object which is created when a changeset that is pushed
+    to a tool shed repository using hg push from the command line is read using readbundle.
+    """
+    while True:
+        length, = struct.unpack( '>l', readexactly( hg_unbundle10_obj, 4 ) )
+        if length <= 4:
+            # We found a "null chunk", which ends the group.
+            break
+        if length < 84:
+            raise Exception( "negative data length" )
+        node, p1, p2, cs = struct.unpack( '20s20s20s20s', readexactly( hg_unbundle10_obj, 80 ) )
+        yield { 'node': node.encode( 'hex' ),
+                'p1': p1.encode( 'hex' ),
+                'p2': p2.encode( 'hex' ),
+                'cs': cs.encode( 'hex' ),
+                'data': [ patch for patch in unpack_patches( hg_unbundle10_obj, length - 84 ) ] }
+
+def unpack_groups( hg_unbundle10_obj ):
+    """
+    This method provides a generator of parsed groups from a mercurial unbundle10 object which is created when a changeset that is pushed
+    to a tool shed repository using hg push from the command line is read using readbundle.
+    """
+    # Process the changelog group.
+    yield [ chunk for chunk in unpack_chunks( hg_unbundle10_obj ) ]
+    # Process the manifest group.
+    yield [ chunk for chunk in unpack_chunks( hg_unbundle10_obj ) ]
+    while True:
+        length, = struct.unpack( '>l', readexactly( hg_unbundle10_obj, 4 ) )
+        if length <= 4:
+            # We found a "null meta chunk", which ends the changegroup.
+            break
+        filename = readexactly( hg_unbundle10_obj, length-4 ).encode( 'string_escape' )
+        # Process the file group.
+        yield ( filename, [ chunk for chunk in unpack_chunks( hg_unbundle10_obj ) ] )
+
+def unpack_patches( hg_unbundle10_obj, remaining ):
+    """
+    This method provides a generator of patches from the data field in a chunk. As there is no delimiter for this data field, a length argument is
+    required.
+    """
+    while remaining >= 12:
+        start, end, blocklen = struct.unpack( '>lll', readexactly( hg_unbundle10_obj, 12 ) )
+        remaining -= 12
+        if blocklen > remaining:
+            raise Exception( "unexpected end of patch stream" )
+        block = readexactly( hg_unbundle10_obj, blocklen )
+        remaining -= blocklen
+        yield { 'start': start,
+                'end': end,
+                'blocklen': blocklen,
+                'block': block.encode( 'string_escape' ) }
+    if remaining > 0:
+        print remaining
+        raise Exception( "unexpected end of patch stream" )

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

commits-noreply＠bitbucket.org

tags

participants (1)