commit/galaxy-central: greg: Fixes for handling large numbers of tool shed repositories during installation into Galaxy.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/604693b3ebe0/ Changeset: 604693b3ebe0 User: greg Date: 2013-05-13 17:10:50 Summary: Fixes for handling large numbers of tool shed repositories during installation into Galaxy. Affected #: 5 files diff -r 65a81aead95e147f709ee3969d49766f35d6a2e2 -r 604693b3ebe0283804225eac10adab2f858ec380 lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py --- a/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py +++ b/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py @@ -313,6 +313,11 @@ """Return an svg image representation of a workflow dictionary created when the workflow was exported.""" return workflow_util.generate_workflow_image( trans, workflow_name, repository_metadata_id=None, repository_id=repository_id ) + @web.expose + def get_contents_of_file( self, trans, encoded_file_path ): + file_path = encoding_util.tool_shed_decode( encoded_file_path ) + return suc.get_file_contents( file_path ) + @web.json @web.require_admin def get_file_contents( self, trans, file_path ): @@ -383,6 +388,34 @@ tool_version = tool_util.get_tool_version( app, guid ) return tool_version.get_version_ids( app, reverse=True ) + @web.json + def handle_large_repo_info_dict( self, trans, **kwd ): + """ + In some cases the required encoded_str will be long. With apache, the default limit for the length of the request line is 8190 bytes + (http://httpd.apache.org/docs/2.2/mod/core.html#limitrequestline). And if we subtract three bytes for the request method (i.e. GET), + eight bytes for the version information (i.e. HTTP/1.0/HTTP/1.1) and two bytes for the separating space, we end up with 8177 bytes for + the URI path plus query. The referer handles requests longer than this by persisting the repo_info_dict to a temporary file which we can read. + """ + required_repo_info_dict = {} + encoded_tmp_file_name = kwd.get( 'encoded_tmp_file_name', None ) + # The request would have been longer than 8190 bytes if it included the encoded_str, so we'll send a request to the Galaxy instance to get it. + tool_shed_url = kwd.get( 'tool_shed_url', None ) + if tool_shed_url and encoded_tmp_file_name: + url = suc.url_join( tool_shed_url, + '/repository/get_contents_of_file?encoded_tmp_file_name=%s' % encoded_tmp_file_name ) + response = urllib2.urlopen( url ) + text = response.read() + required_repo_info_dict = json.from_json_string( text ) + else: + log.debug( "Invalid tool_shed_url '%s' or encoded_tmp_file_name '%s'." % ( str( tool_shed_url ), str( encoded_tmp_file_name ) ) ) + tmp_file_name = encoding_util.tool_shed_decode( encoded_tmp_file_name ) + if os.path.exists( tmp_file_name ): + try: + os.unkink( tmp_file_name ) + except: + pass + return common_install_util.process_repo_info_dict( trans, required_repo_info_dict ) + @web.expose @web.require_admin def import_workflow( self, trans, workflow_name, repository_id, **kwd ): @@ -1406,15 +1439,6 @@ message=message, status=status ) - @web.expose - def stream_file_contents( self, trans, encoded_tmp_file_name ): - tmp_file_name = encoding_util.tool_shed_decode( encoded_tmp_file_name ) - if os.path.exists( tmp_file_name ): - return open( tmp_file_name, 'r' ) - else: - log.debug( "The required temporary file '%s' cannot be located." % str( tmp_file_name ) ) - return '' - @web.json def tool_dependency_status_updates( self, trans, ids=None, status_list=None ): # Avoid caching diff -r 65a81aead95e147f709ee3969d49766f35d6a2e2 -r 604693b3ebe0283804225eac10adab2f858ec380 lib/galaxy/webapps/tool_shed/controllers/repository.py --- a/lib/galaxy/webapps/tool_shed/controllers/repository.py +++ b/lib/galaxy/webapps/tool_shed/controllers/repository.py @@ -1285,6 +1285,11 @@ return encoding_util.tool_shed_encode( update_dict ) @web.expose + def get_contents_of_file( self, trans, encoded_file_path ): + file_path = encoding_util.tool_shed_decode( encoded_file_path ) + return suc.get_file_contents( file_path ) + + @web.expose def get_ctx_rev( self, trans, **kwd ): """Given a repository and changeset_revision, return the correct ctx.rev() value.""" repository_name = kwd[ 'name' ] @@ -1473,32 +1478,12 @@ repo_info_dicts=repo_info_dicts ) @web.json - def get_required_repo_info_dict( self, trans, encoded_str=None, **kwd ): + def get_required_repo_info_dict( self, trans, encoded_str, **kwd ): """ Retrieve and return a dictionary that includes a list of dictionaries that each contain all of the information needed to install the list of repositories defined by the received encoded_str. """ - # In some cases the received encoded_str will be long. With apache, the default limit for the length of the request line is 8190 bytes - # (http://httpd.apache.org/docs/2.2/mod/core.html#limitrequestline). And if we subtract three bytes for the request method (i.e. GET), - # eight bytes for the version information (i.e. HTTP/1.0/HTTP/1.1) and two bytes for the separating space, we end up with 8177 bytes for - # the URI path plus query. The referer handles requests longer than this by persisting the encoded_str to a temporary file which we - # can read. - encoded_tmp_file_name = kwd.get( 'encoded_tmp_file_name', None ) - if encoded_str: - encoded_required_repository_str = encoding_util.tool_shed_decode( encoded_str ) - else: - # The request would have been longer than 8190 bytes if it included the encoded_str, so we'll send a request to the Galaxy instance to - # stream the string to us. - galaxy_url = suc.handle_galaxy_url( trans, **kwd ) - if galaxy_url and encoded_tmp_file_name: - url = suc.url_join( galaxy_url, - 'admin_toolshed/stream_file_contents?encoded_tmp_file_name=%s' % encoded_tmp_file_name ) - response = urllib2.urlopen( url ) - encoded_required_repository_str = response.read() - else: - log.debug( "Invalid galaxy_url '%s' or encoded_tmp_file_name '%s'." % ( str( galaxy_url ), str( encoded_tmp_file_name ) ) ) - repo_info_dict = {} - return repo_info_dict + encoded_required_repository_str = encoding_util.tool_shed_decode( encoded_str ) encoded_required_repository_tups = encoded_required_repository_str.split( encoding_util.encoding_sep2 ) decoded_required_repository_tups = [] for encoded_required_repository_tup in encoded_required_repository_tups: @@ -1675,6 +1660,41 @@ return tool_guid_lineage @web.expose + def handle_large_repo_info_dict( self, trans, **kwd ): + """ + In some cases the required encoded_str will be long. With apache, the default limit for the length of the request line is 8190 bytes + (http://httpd.apache.org/docs/2.2/mod/core.html#limitrequestline). And if we subtract three bytes for the request method (i.e. GET), + eight bytes for the version information (i.e. HTTP/1.0/HTTP/1.1) and two bytes for the separating space, we end up with 8177 bytes for + the URI path plus query. The referer handles requests longer than this by persisting the encoded_str to a temporary file which we can read. + """ + repo_info_dict = {} + encoded_tmp_file_name = kwd.get( 'encoded_tmp_file_name', None ) + # The request would have been longer than 8190 bytes if it included the encoded_str, so we'll send a request to the Galaxy instance to get it. + galaxy_url = suc.handle_galaxy_url( trans, **kwd ) + if galaxy_url and encoded_tmp_file_name: + url = suc.url_join( galaxy_url, + '/admin_toolshed/get_contents_of_file?encoded_tmp_file_name=%s' % encoded_tmp_file_name ) + response = urllib2.urlopen( url ) + encoded_required_repository_str = response.read() + repo_info_dict = self.get_required_repo_info_dict( trans, encoded_str ) + repo_info_dict[ 'encoded_tmp_file_name' ] = encoded_tmp_file_name + else: + log.debug( "Invalid galaxy_url '%s' or encoded_tmp_file_name '%s'." % ( str( galaxy_url ), str( encoded_tmp_file_name ) ) ) + # Persist the large repo_info_dict to a temporary file. + fh = tempfile.NamedTemporaryFile( 'wb' ) + tmp_file_name = fh.name + fh.close() + fh = open( tmp_file_name, 'wb' ) + fh.write( encoded_required_repository_str ) + fh.close() + encoded_tmp_file_name = encoding_util.tool_shed_encode( os.path.abspath( tmp_file_name ) ) + tool_shed_url = web.url_for( '/', qualified=True ) + # Redirect to the tool shed to enable it to read the persisted encoded_required_repository_str. + url = suc.url_join( galaxy_url, + '/admin_toolshed/handle_large_repo_info_dict?encoded_tmp_file_name=%s&tool_shed_url=%s' % ( str( encoded_tmp_file_name ), str( tool_shed_url ) ) ) + return trans.response.send_redirect( url ) + + @web.expose def help( self, trans, **kwd ): params = util.Params( kwd ) message = util.restore_text( params.get( 'message', '' ) ) diff -r 65a81aead95e147f709ee3969d49766f35d6a2e2 -r 604693b3ebe0283804225eac10adab2f858ec380 lib/tool_shed/util/common_install_util.py --- a/lib/tool_shed/util/common_install_util.py +++ b/lib/tool_shed/util/common_install_util.py @@ -17,6 +17,9 @@ import pkg_resources +pkg_resources.require( "simplejson" ) +import simplejson + pkg_resources.require( 'elementtree' ) from elementtree import ElementTree from elementtree import ElementInclude @@ -253,7 +256,6 @@ repository_dependencies entries in each of the received repo_info_dicts includes all required repositories, so only one pass through this method is required to retrieve all repository dependencies. """ - all_repo_info_dicts = [] if repo_info_dicts: # We'll send tuples of ( tool_shed, repository_name, repository_owner, changeset_revision ) to the tool shed to discover repository ids. required_repository_tups = [] @@ -287,59 +289,47 @@ encoded_required_repository_str = encoding_util.encoding_sep2.join( encoded_required_repository_tups ) encoded_required_repository_str = encoding_util.tool_shed_encode( encoded_required_repository_str ) url = suc.url_join( tool_shed_url, '/repository/get_required_repo_info_dict?encoded_str=%s' % encoded_required_repository_str ) - # In some cases the above URL will be long. With apache, the default limit for the length of the request line is 8190 bytes - # (http://httpd.apache.org/docs/2.2/mod/core.html#limitrequestline). And if we subtract three bytes for the request method - # (i.e. GET), eight bytes for the version information (i.e. HTTP/1.0/HTTP/1.1) and two bytes for the separating space, we end - # up with 8177 bytes for the URI path plus query. - if len( url ) >= 8177: - # Persist the encoded string to a temporary file. - fh = tempfile.NamedTemporaryFile( 'wb' ) - tmp_file_name = fh.name - fh.close() - fh = open( tmp_file_name, 'wb' ) - fh.write( encoded_required_repository_str ) - fh.close() - encoded_tmp_file_name = encoding_util.tool_shed_encode( os.path.abspath( tmp_file_name ) ) - galaxy_url = web.url_for( '/', qualified=True ) - # Send a request to the tool shed to enable it to read the temporary file. - url = suc.url_join( tool_shed_url, - '/repository/get_required_repo_info_dict?encoded_tmp_file_name=%s&galaxy_url=%s' % \ - ( encoded_tmp_file_name, galaxy_url ) ) - else: - encoded_tmp_file_name = None - tmp_file_name = None - try: - text = common_util.tool_shed_get( trans.app, tool_shed_url, url ) - except Exception, e: - if encoded_tmp_file_name: - message = 'The selected tool shed repositories cannot be installed until the tool shed at ' - message += '%s and the Galaxy instance at %s ' % ( str( tool_shed_url ), str( trans.request.base ) ) - message += 'are both updated to at least the June 3, 2013 Galaxy release. These upgrades ' - message += 'are necessary because the number of repositories you are attempting to install ' - message += 'generates an HTTP request that is longer than 8177 bytes which cannot be handled ' - message += 'by tool shed or Galaxy instances older than this release.' - log.debug( message ) - else: + text = common_util.tool_shed_get( trans.app, tool_shed_url, url ) + if text: + try: + required_repo_info_dict = json.from_json_string( text ) + except simplejson.decoder.JSONDecodeError, e: + if len( url ) >= 8177: + message = '\n\nThe selected tool shed repositories cannot be installed until the tool shed at %s and the Galaxy ' % str( tool_shed_url ) + message += 'instance at %s are both updated to at least the June 3, 2013 Galaxy release. These upgrades ' % str( trans.request.base ) + message += 'are necessary because the number of repositories you are attempting to install generates an HTTP request that is longer than ' + message += '8177 bytes which cannot be handled by tool shed or Galaxy instances older than the June 3, 2013 release.\n\n' + log.exception( message ) + else: + log.exception() + return [] + except Exception, e: log.exception() - text = None - if tmp_file_name: - try: - os.unlink( tmp_file_name ) - except: - pass - if text: - required_repo_info_dict = json.from_json_string( text ) - required_repo_info_dicts = [] - encoded_dict_strings = required_repo_info_dict[ 'repo_info_dicts' ] - for encoded_dict_str in encoded_dict_strings: - decoded_dict = encoding_util.tool_shed_decode( encoded_dict_str ) - required_repo_info_dicts.append( decoded_dict ) - if required_repo_info_dicts: - for required_repo_info_dict in required_repo_info_dicts: - if required_repo_info_dict not in all_repo_info_dicts: - all_repo_info_dicts.append( required_repo_info_dict ) - return all_repo_info_dicts + return [] + return process_repo_info_dict( trans, required_repo_info_dict ) + return [] +def handle_large_repo_info_dict( trans, tool_shed_url, encoded_required_repository_str ): + """ + Handle the cases where the received encoded_required_repository_str is long. With apache, the default limit for the length of the request line is 8190 bytes + (http://httpd.apache.org/docs/2.2/mod/core.html#limitrequestline). And if we subtract three bytes for the request method (i.e. GET), eight bytes for the version + information (i.e. HTTP/1.0/HTTP/1.1) and two bytes for the separating space, we end up with 8177 bytes for the URI path plus query. + """ + # Persist the encoded string to a temporary file. + fh = tempfile.NamedTemporaryFile( 'wb' ) + tmp_file_name = fh.name + fh.close() + fh = open( tmp_file_name, 'wb' ) + fh.write( encoded_required_repository_str ) + fh.close() + encoded_tmp_file_name = encoding_util.tool_shed_encode( os.path.abspath( tmp_file_name ) ) + galaxy_url = web.url_for( '/', qualified=True ) + # Redirect to the tool shed to enable it to read the persisted encoded_required_repository_str. + url = suc.url_join( tool_shed_url, + '/repository/handle_large_repo_info_dict?encoded_tmp_file_name=%s&galaxy_url=%s' % \ + ( encoded_tmp_file_name, galaxy_url ) ) + return trans.response.send_redirect( url ) + def handle_tool_dependencies( app, tool_shed_repository, tool_dependencies_config, tool_dependencies ): """ Install and build tool dependencies defined in the tool_dependencies_config. This config's tag sets can currently refer to installation @@ -397,3 +387,24 @@ app.model.ToolDependency.installation_status.ERROR ]: installed_tool_dependencies.append( tool_dependency ) return installed_tool_dependencies + +def process_repo_info_dict( trans, required_repo_info_dict ): + all_repo_info_dicts = [] + required_repo_info_dicts = [] + encoded_dict_strings = required_repo_info_dict[ 'repo_info_dicts' ] + for encoded_dict_str in encoded_dict_strings: + decoded_dict = encoding_util.tool_shed_decode( encoded_dict_str ) + required_repo_info_dicts.append( decoded_dict ) + if required_repo_info_dicts: + for required_repo_info_dict in required_repo_info_dicts: + if required_repo_info_dict not in all_repo_info_dicts: + all_repo_info_dicts.append( required_repo_info_dict ) + # Remove the temporary file that stored the long encoded_required_repository_str if possible. + encoded_tmp_file_name = required_repo_info_dict.get( 'encoded_tmp_file_name', None ) + if encoded_tmp_file_name: + tmp_file_name = encoding_util.tool_shed_decode( encoded_tmp_file_name ) + try: + os.unlink( tmp_file_name ) + except: + pass + return all_repo_info_dicts diff -r 65a81aead95e147f709ee3969d49766f35d6a2e2 -r 604693b3ebe0283804225eac10adab2f858ec380 lib/tool_shed/util/common_util.py --- a/lib/tool_shed/util/common_util.py +++ b/lib/tool_shed/util/common_util.py @@ -27,7 +27,7 @@ url = '%s/repository/get_tool_dependencies?name=%s&owner=%s&changeset_revision=%s&from_install_manager=True' % \ ( tool_shed_url, repository_name, REPOSITORY_OWNER, changeset_revision ) try: - text = tool_shed_get(app, tool_shed_url, url) + text = tool_shed_get( app, tool_shed_url, url ) tool_shed_accessible = True except Exception, e: # Tool shed may be unavailable - we have to set tool_shed_accessible since we're looping. diff -r 65a81aead95e147f709ee3969d49766f35d6a2e2 -r 604693b3ebe0283804225eac10adab2f858ec380 lib/tool_shed/util/shed_util_common.py --- a/lib/tool_shed/util/shed_util_common.py +++ b/lib/tool_shed/util/shed_util_common.py @@ -466,6 +466,14 @@ return manifest_ctx, ctx_file return None, None +def get_file_contents( file_path ): + if os.path.exists( file_path ): + fh = open( file_path ) + contents = fh.read() + fh.close() + return contents + return '' + def get_file_context_from_ctx( ctx, filename ): """Return the mercurial file context for a specified file.""" # We have to be careful in determining if we found the correct file because multiple files with the same name may be in different directories Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org