commit/galaxy-central: greg: Restrict diff file size to something reasonable when browsing changesets in the Tool shed, and add some additional logging and error handling when setting metadata on repositories in the tool shed and Galaxy.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/148a93733033/ Changeset: 148a93733033 User: greg Date: 2013-10-18 19:07:37 Summary: Restrict diff file size to something reasonable when browsing changesets in the Tool shed, and add some additional logging and error handling when setting metadata on repositories in the tool shed and Galaxy. Affected #: 7 files diff -r a5d98e64b173e5fc11d0d7234a16fbc80c553317 -r 148a93733033814da72a7e7fc8139054626e9354 lib/galaxy/webapps/galaxy/api/tool_shed_repositories.py --- a/lib/galaxy/webapps/galaxy/api/tool_shed_repositories.py +++ b/lib/galaxy/webapps/galaxy/api/tool_shed_repositories.py @@ -517,10 +517,10 @@ message = tool_util.generate_message_for_invalid_tools( trans, invalid_file_tups, repository, None, as_html=False ) results[ 'unsuccessful_count' ] += 1 else: - message = "Successfully reset metadata on repository %s" % str( repository.name ) + message = "Successfully reset metadata on repository %s owned by %s" % ( str( repository.name ), str( repository.owner ) ) results[ 'successful_count' ] += 1 except Exception, e: - message = "Error resetting metadata on repository %s: %s" % ( str( repository.name ), str( e ) ) + message = "Error resetting metadata on repository %s owned by %s: %s" % ( str( repository.name ), str( repository.owner ), str( e ) ) results[ 'unsuccessful_count' ] += 1 results[ 'repository_status' ].append( message ) stop_time = strftime( "%Y-%m-%d %H:%M:%S" ) diff -r a5d98e64b173e5fc11d0d7234a16fbc80c553317 -r 148a93733033814da72a7e7fc8139054626e9354 lib/galaxy/webapps/tool_shed/api/repositories.py --- a/lib/galaxy/webapps/tool_shed/api/repositories.py +++ b/lib/galaxy/webapps/tool_shed/api/repositories.py @@ -231,8 +231,11 @@ in addition to those repositories of type tool_dependency_definition. This param is ignored if the current user is not an admin user, in which case this same restriction is automatic. :param encoded_ids_to_skip (optional): a list of encoded repository ids for repositories that should not be processed. + :param skip_file (optional): A local file name that contains the encoded repository ids associated with repositories to skip. + This param can be used as an alternative to the above encoded_ids_to_skip. """ def handle_repository( trans, repository, results ): + log.debug( "Resetting metadata on repository %s" % str( repository.name ) ) repository_id = trans.security.encode_id( repository.id ) try: invalid_file_tups, metadata_dict = metadata_util.reset_all_metadata_on_repository_in_tool_shed( trans, repository_id ) @@ -240,12 +243,13 @@ message = tool_util.generate_message_for_invalid_tools( trans, invalid_file_tups, repository, None, as_html=False ) results[ 'unsuccessful_count' ] += 1 else: - message = "Successfully reset metadata on repository %s" % str( repository.name ) + message = "Successfully reset metadata on repository %s owned by %s" % ( str( repository.name ), str( repository.user.username ) ) results[ 'successful_count' ] += 1 except Exception, e: - message = "Error resetting metadata on repository %s: %s" % ( str( repository.name ), str( e ) ) + message = "Error resetting metadata on repository %s owned by %s: %s" % ( str( repository.name ), str( repository.user.username ), str( e ) ) results[ 'unsuccessful_count' ] += 1 - results[ 'repository_status' ].append( message ) + status = '%s : %s' % ( str( repository.name ), message ) + results[ 'repository_status' ].append( status ) return results try: start_time = strftime( "%Y-%m-%d %H:%M:%S" ) @@ -255,6 +259,16 @@ unsuccessful_count=0 ) handled_repository_ids = [] encoded_ids_to_skip = payload.get( 'encoded_ids_to_skip', [] ) + skip_file = payload.get( 'skip_file', None ) + if skip_file and os.path.exists( skip_file ) and not encoded_ids_to_skip: + # Load the list of encoded_ids_to_skip from the skip_file. + # Contents of file must be 1 encoded repository id per line. + lines = open( skip_file, 'rb' ).readlines() + for line in lines: + if line.startswith( '#' ): + # Skip comments. + continue + encoded_ids_to_skip.append( line.rstrip( '\n' ) ) if trans.user_is_admin(): my_writable = util.asbool( payload.get( 'my_writable', False ) ) else: @@ -306,17 +320,18 @@ if invalid_file_tups: message = tool_util.generate_message_for_invalid_tools( trans, invalid_file_tups, repository, None, as_html=False ) else: - message = "Successfully reset metadata on repository %s" % str( repository.name ) + message = "Successfully reset metadata on repository %s owned by %s" % ( str( repository.name ), str( repository.user.username ) ) except Exception, e: - message = "Error resetting metadata on repository %s: %s" % ( str( repository.name ), str( e ) ) - - results[ 'repository_status' ].append( message ) + message = "Error resetting metadata on repository %s owned by %s: %s" % ( str( repository.name ), str( repository.user.username ), str( e ) ) + status = '%s : %s' % ( str( repository.name ), message ) + results[ 'repository_status' ].append( status ) return results try: repository_id = payload.get( 'repository_id', None ) if repository_id is not None: repository = suc.get_repository_in_tool_shed( trans, repository_id ) start_time = strftime( "%Y-%m-%d %H:%M:%S" ) + log.debug( "%s...resetting metadata on repository %s" % ( start_time, str( repository.name ) ) ) results = handle_repository( trans, start_time, repository ) stop_time = strftime( "%Y-%m-%d %H:%M:%S" ) results[ 'stop_time' ] = stop_time diff -r a5d98e64b173e5fc11d0d7234a16fbc80c553317 -r 148a93733033814da72a7e7fc8139054626e9354 lib/galaxy/webapps/tool_shed/controllers/hg.py --- a/lib/galaxy/webapps/tool_shed/controllers/hg.py +++ b/lib/galaxy/webapps/tool_shed/controllers/hg.py @@ -39,7 +39,9 @@ # Set metadata using the repository files on disk. error_message, status = set_repository_metadata( trans, repository ) if status == 'ok' and error_message: - log.debug( "Successfully reset metadata on repository %s, but encountered problem: %s" % ( repository.name, error_message ) ) + log.debug( "Successfully reset metadata on repository %s owned by %s, but encountered problem: %s" % \ + ( str( repository.name ), str( repository.user.username ), error_message ) ) elif status != 'ok' and error_message: - log.debug( "Error resetting metadata on repository %s: %s" % ( repository.name, error_message ) ) + log.debug( "Error resetting metadata on repository %s owned by %s: %s" % \ + ( str( repository.name ), str( repository.user.username ), error_message ) ) return wsgi_app diff -r a5d98e64b173e5fc11d0d7234a16fbc80c553317 -r 148a93733033814da72a7e7fc8139054626e9354 lib/galaxy/webapps/tool_shed/controllers/repository.py --- a/lib/galaxy/webapps/tool_shed/controllers/repository.py +++ b/lib/galaxy/webapps/tool_shed/controllers/repository.py @@ -42,6 +42,7 @@ from mercurial import commands from mercurial import hg +from mercurial import mdiff from mercurial import patch from mercurial import ui @@ -2856,11 +2857,19 @@ ctx_child = ctx.children()[ 0 ] else: ctx_child = None + diffs = [] + options_dict = suc.get_mercurial_default_options_dict( 'diff' ) + # Not quite sure if the following settings make any difference, but with a combination of them and the size check on each + # diff, we don't run out of memory when viewing the changelog of the cisortho2 repository on the test tool shed. + options_dict[ 'maxfile' ] = suc.MAXDIFFSIZE + options_dict[ 'maxtotal' ] = suc.MAXDIFFSIZE + diffopts = mdiff.diffopts( **options_dict ) + for diff in patch.diff( repo, node1=ctx_parent.node(), node2=ctx.node(), opts=diffopts ): + if len( diff ) > suc.MAXDIFFSIZE: + diff = util.shrink_string_by_size( diff, suc.MAXDIFFSIZE ) + diffs.append( suc.to_html_string( diff ) ) modified, added, removed, deleted, unknown, ignored, clean = repo.status( node1=ctx_parent.node(), node2=ctx.node() ) anchors = modified + added + removed + deleted + unknown + ignored + clean - diffs = [] - for diff in patch.diff( repo, node1=ctx_parent.node(), node2=ctx.node() ): - diffs.append( suc.to_html_string( diff ) ) metadata = metadata_util.get_repository_metadata_by_repository_id_changeset_revision( trans, id, ctx_str, metadata_only=True ) # For rendering the prev button. if ctx_parent: diff -r a5d98e64b173e5fc11d0d7234a16fbc80c553317 -r 148a93733033814da72a7e7fc8139054626e9354 lib/tool_shed/scripts/api/reset_metadata_on_repositories.py --- a/lib/tool_shed/scripts/api/reset_metadata_on_repositories.py +++ b/lib/tool_shed/scripts/api/reset_metadata_on_repositories.py @@ -25,33 +25,55 @@ else: return False +def read_skip_file( skip_file ): + encoded_ids_to_skip = [] + if os.path.exists( skip_file ): + # Contents of file must be 1 encoded repository id per line. + lines = open( skip_file, 'rb' ).readlines() + for line in lines: + if line.startswith( '#' ): + # Skip comments. + continue + encoded_ids_to_skip.append( line.rstrip( '\n' ) ) + return encoded_ids_to_skip + def main( options ): api_key = options.api base_tool_shed_url = options.tool_shed_url.rstrip( '/' ) my_writable = options.my_writable one_per_request = options.one_per_request skip_file = options.skip_file - encoded_ids_to_skip = [] - if skip_file and os.path.exists( skip_file ): - # Contents of file must be 1 encoded repository id per line. - contents = open( skip_file, 'rb' ).read() - if contents: - encoded_ids_to_skip = contents.split( '\n' ) + if skip_file: + encoded_ids_to_skip = read_skip_file( skip_file ) + else: + encoded_ids_to_skip = [] if string_as_bool( one_per_request ): url = '%s/api/repositories/repository_ids_for_setting_metadata?key=%s&my_writable=%s' % ( base_tool_shed_url, api_key, str( my_writable ) ) repository_ids = get( url, api_key ) for repository_id in repository_ids: if repository_id in encoded_ids_to_skip: + print "--------" print "Skipping repository with id %s because it is in skip file %s" % ( str( repository_id ), str( skip_file ) ) + print "--------" else: data = dict( repository_id=repository_id ) url = '%s/api/repositories/reset_metadata_on_repository' % base_tool_shed_url - submit( url, data, options.api ) + try: + submit( url, data, options.api ) + except Exception, e: + log.exception( ">>>>>>>>>>>>>>>Blew up on data: %s, exception: %s" % ( str( data ), str( e ) ) ) + # An nginx timeout undoubtedly occurred. + sys.exit( 1 ) else: data = dict( encoded_ids_to_skip=encoded_ids_to_skip, my_writable=my_writable ) url = '%s/api/repositories/reset_metadata_on_repositories' % base_tool_shed_url - submit( url, data, options.api ) + try: + submit( url, data, options.api ) + except Exception, e: + log.exception( str( e ) ) + # An nginx timeout undoubtedly occurred. + sys.exit( 1 ) if __name__ == '__main__': parser = argparse.ArgumentParser( description='Reset metadata on certain repositories in the Tool Shed via the Tool Shed API.' ) diff -r a5d98e64b173e5fc11d0d7234a16fbc80c553317 -r 148a93733033814da72a7e7fc8139054626e9354 lib/tool_shed/util/metadata_util.py --- a/lib/tool_shed/util/metadata_util.py +++ b/lib/tool_shed/util/metadata_util.py @@ -1772,17 +1772,19 @@ if trans.webapp.name == 'tool_shed': # We're in the tool shed. repository = suc.get_repository_in_tool_shed( trans, repository_id ) + owner = str( repository.user.username ) invalid_file_tups, metadata_dict = reset_all_metadata_on_repository_in_tool_shed( trans, repository_id ) else: # We're in Galaxy. repository = suc.get_installed_tool_shed_repository( trans, repository_id ) + owner = str( repository.owner ) invalid_file_tups, metadata_dict = reset_all_metadata_on_installed_repository( trans, repository_id ) if invalid_file_tups: message = tool_util.generate_message_for_invalid_tools( trans, invalid_file_tups, repository, None, as_html=False ) log.debug( message ) unsuccessful_count += 1 else: - log.debug( "Successfully reset metadata on repository %s" % str( repository.name ) ) + log.debug( "Successfully reset metadata on repository %s owned by %s" % ( str( repository.name ), owner ) ) successful_count += 1 except: log.exception( "Error attempting to reset metadata on repository %s", str( repository.name ) ) diff -r a5d98e64b173e5fc11d0d7234a16fbc80c553317 -r 148a93733033814da72a7e7fc8139054626e9354 lib/tool_shed/util/shed_util_common.py --- a/lib/tool_shed/util/shed_util_common.py +++ b/lib/tool_shed/util/shed_util_common.py @@ -40,6 +40,7 @@ CHUNK_SIZE = 2**20 # 1Mb INITIAL_CHANGELOG_HASH = '000000000000' MAX_CONTENT_SIZE = 1048576 +MAXDIFFSIZE = 8000 MAX_DISPLAY_SIZE = 32768 DATATYPES_CONFIG_FILENAME = 'datatypes_conf.xml' REPOSITORY_DATA_MANAGER_CONFIG_FILENAME = 'data_manager_conf.xml' Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org