commit/galaxy-central: greg: More fixes for setting metadata on repositories in the toool shed. Change set includes other miscellaneous fixes, including the elimination of some problematice historical code that managed temporary working directories which resulted in problematic race conditions.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/9f790bc90769/ changeset: 9f790bc90769 user: greg date: 2012-08-08 20:31:46 summary: More fixes for setting metadata on repositories in the toool shed. Change set includes other miscellaneous fixes, including the elimination of some problematice historical code that managed temporary working directories which resulted in problematic race conditions. affected #: 7 files diff -r 5f2db4a18d3d3bef78f0a330cc6073f34db7c88b -r 9f790bc90769df7a4f84f103707bdd8ceaf1115d lib/galaxy/tool_shed/install_manager.py --- a/lib/galaxy/tool_shed/install_manager.py +++ b/lib/galaxy/tool_shed/install_manager.py @@ -2,7 +2,7 @@ Manage automatic installation of tools configured in the xxx.xml files in ~/scripts/migrate_tools (e.g., 0002_tools.xml). All of the tools were at some point included in the Galaxy distribution, but are now hosted in the main Galaxy tool shed. """ -import urllib2 +import urllib2, tempfile from galaxy.tools import ToolSection from galaxy.util.json import from_json_string, to_json_string from galaxy.util.shed_util import * @@ -132,7 +132,7 @@ tool_panel_dict_for_tool_config = generate_tool_panel_dict_for_tool_config( guid, tool_config, tool_sections=tool_sections ) for k, v in tool_panel_dict_for_tool_config.items(): tool_panel_dict_for_display[ k ] = v - metadata_dict = generate_metadata_using_disk_files( self.toolbox, relative_install_dir, repository_clone_url ) + metadata_dict = generate_metadata_for_changeset_revision( self.app, relative_install_dir, repository_clone_url ) tool_shed_repository.metadata = metadata_dict self.app.sa_session.add( tool_shed_repository ) self.app.sa_session.flush() @@ -142,7 +142,7 @@ else: tool_dependencies = None if 'tools' in metadata_dict: - work_dir = make_tmp_directory() + work_dir = tempfile.mkdtemp() repository_tools_tups = get_repository_tools_tups( self.app, metadata_dict ) if repository_tools_tups: sample_files = metadata_dict.get( 'sample_files', [] ) @@ -195,7 +195,7 @@ tool_shed_repository.includes_datatypes = True self.app.sa_session.add( tool_shed_repository ) self.app.sa_session.flush() - work_dir = make_tmp_directory() + work_dir = tempfile.mkdtemp() datatypes_config = get_config_from_repository( self.app, 'datatypes_conf.xml', tool_shed_repository, diff -r 5f2db4a18d3d3bef78f0a330cc6073f34db7c88b -r 9f790bc90769df7a4f84f103707bdd8ceaf1115d lib/galaxy/util/shed_util.py --- a/lib/galaxy/util/shed_util.py +++ b/lib/galaxy/util/shed_util.py @@ -2,7 +2,7 @@ import galaxy.tools.data from datetime import date, datetime, timedelta from time import strftime, gmtime -from galaxy import util +from galaxy import tools, util from galaxy.datatypes.checkers import * from galaxy.util.json import * from galaxy.tools.search import ToolBoxSearch @@ -247,6 +247,52 @@ except: pass return converter_path, display_path +def check_tool_input_params( app, repo_dir, tool_config_name, tool, sample_files ): + """ + Check all of the tool's input parameters, looking for any that are dynamically generated using external data files to make + sure the files exist. + """ + invalid_files_and_errors_tups = [] + correction_msg = '' + for input_param in tool.input_params: + if isinstance( input_param, tools.parameters.basic.SelectToolParameter ) and input_param.is_dynamic: + # If the tool refers to .loc files or requires an entry in the tool_data_table_conf.xml, make sure all requirements exist. + options = input_param.dynamic_options or input_param.options + if options: + if options.tool_data_table or options.missing_tool_data_table_name: + # Make sure the repository contains a tool_data_table_conf.xml.sample file. + sample_tool_data_table_conf = get_config_from_disk( 'tool_data_table_conf.xml.sample', repo_dir ) + if sample_tool_data_table_conf: + error, correction_msg = handle_sample_tool_data_table_conf_file( app, sample_tool_data_table_conf ) + if error: + invalid_files_and_errors_tups.append( ( 'tool_data_table_conf.xml.sample', correction_msg ) ) + else: + options.missing_tool_data_table_name = None + else: + correction_msg = "This file requires an entry in the tool_data_table_conf.xml file. Upload a file named tool_data_table_conf.xml.sample " + correction_msg += "to the repository that includes the required entry to correct this error.<br/>" + invalid_files_and_errors_tups.append( ( tool_config_name, correction_msg ) ) + if options.index_file or options.missing_index_file: + # Make sure the repository contains the required xxx.loc.sample file. + index_file = options.index_file or options.missing_index_file + index_file_name = strip_path( index_file ) + sample_found = False + for sample_file in sample_files: + sample_file_name = strip_path( sample_file ) + if sample_file_name == '%s.sample' % index_file_name: + options.index_file = index_file_name + options.missing_index_file = None + if options.tool_data_table: + options.tool_data_table.missing_index_file = None + sample_found = True + break + if not sample_found: + correction_msg = "This file refers to a file named <b>%s</b>. " % str( index_file ) + correction_msg += "Upload a file named <b>%s.sample</b> to the repository to correct this error." % str( index_file_name ) + invalid_files_and_errors_tups.append( ( tool_config_name, correction_msg ) ) + # Reset the tool_data_tables by loading the empty tool_data_table_conf.xml file. + reset_tool_data_tables( app ) + return invalid_files_and_errors_tups def config_elems_to_xml_file( app, config_elems, config_filename, tool_path ): # Persist the current in-memory list of config_elems to a file named by the value of config_filename. fd, filename = tempfile.mkstemp() @@ -383,7 +429,7 @@ def create_tool_dependency_objects( app, tool_shed_repository, current_changeset_revision, set_status=True ): # Create or update a ToolDependency for each entry in tool_dependencies_config. This method is called when installing a new tool_shed_repository. tool_dependency_objects = [] - work_dir = make_tmp_directory() + work_dir = tempfile.mkdtemp() # Get the tool_dependencies.xml file from the repository. tool_dependencies_config = get_config_from_repository( app, 'tool_dependencies.xml', @@ -501,6 +547,76 @@ if not can_generate_dependency_metadata: break return can_generate_dependency_metadata +def generate_metadata_for_changeset_revision( app, repository_files_dir, repository_clone_url ): + """ + Generate metadata for a repository using it's files on disk. To generate metadata for changeset revisions older than the repository tip, + the repository will have been cloned to a temporary location and updated to a specified changeset revision to access that changeset revision's + disk files, so the value of repository_files_dir will not always be repository.repo_path (it could be a temporary directory containing a clone). + """ + metadata_dict = {} + invalid_file_tups = [] + invalid_tool_configs = [] + tool_dependencies_config = None + datatypes_config = get_config_from_disk( 'datatypes_conf.xml', repository_files_dir ) + if datatypes_config: + metadata_dict = generate_datatypes_metadata( datatypes_config, metadata_dict ) + sample_files = get_sample_files_from_disk( repository_files_dir ) + if sample_files: + metadata_dict[ 'sample_files' ] = sample_files + # Find all tool configs and exported workflows. + for root, dirs, files in os.walk( repository_files_dir ): + if root.find( '.hg' ) < 0 and root.find( 'hgrc' ) < 0: + if '.hg' in dirs: + dirs.remove( '.hg' ) + for name in files: + # Find all tool configs. + if name not in NOT_TOOL_CONFIGS and name.endswith( '.xml' ): + full_path = os.path.abspath( os.path.join( root, name ) ) + if not ( check_binary( full_path ) or check_image( full_path ) or check_gzip( full_path )[ 0 ] + or check_bz2( full_path )[ 0 ] or check_zip( full_path ) ): + try: + # Make sure we're looking at a tool config and not a display application config or something else. + element_tree = util.parse_xml( full_path ) + element_tree_root = element_tree.getroot() + is_tool = element_tree_root.tag == 'tool' + except Exception, e: + print "Error parsing %s", full_path, ", exception: ", str( e ) + is_tool = False + if is_tool: + try: + tool = app.toolbox.load_tool( full_path ) + except Exception, e: + tool = None + invalid_tool_configs.append( name ) + if tool is not None: + invalid_files_and_errors_tups = check_tool_input_params( app, repository_files_dir, name, tool, sample_files ) + can_set_metadata = True + for tup in invalid_files_and_errors_tups: + if name in tup: + can_set_metadata = False + invalid_tool_configs.append( name ) + break + if can_set_metadata: + metadata_dict = generate_tool_metadata( name, tool, repository_clone_url, metadata_dict ) + else: + invalid_file_tups.extend( invalid_files_and_errors_tups ) + # Find all exported workflows + elif name.endswith( '.ga' ): + relative_path = os.path.join( root, name ) + fp = open( relative_path, 'rb' ) + workflow_text = fp.read() + fp.close() + exported_workflow_dict = from_json_string( workflow_text ) + if 'a_galaxy_workflow' in exported_workflow_dict and exported_workflow_dict[ 'a_galaxy_workflow' ] == 'true': + metadata_dict = generate_workflow_metadata( relative_path, exported_workflow_dict, metadata_dict ) + if 'tools' in metadata_dict: + # This step must be done after metadata for tools has been defined. + tool_dependencies_config = get_config_from_disk( 'tool_dependencies.xml', repository_files_dir ) + if tool_dependencies_config: + metadata_dict = generate_tool_dependency_metadata( tool_dependencies_config, metadata_dict ) + if invalid_tool_configs: + metadata_dict [ 'invalid_tools' ] = invalid_tool_configs + return metadata_dict, invalid_file_tups def generate_package_dependency_metadata( elem, tool_dependencies_dict ): """The value of package_name must match the value of the "package" type in the tool config's <requirements> tag set.""" requirements_dict = {} @@ -517,58 +633,6 @@ if requirements_dict: tool_dependencies_dict[ dependency_key ] = requirements_dict return tool_dependencies_dict -def generate_metadata_using_disk_files( toolbox, relative_install_dir, repository_clone_url ): - """Generate metadata using only the repository files on disk - files are not retrieved from the repository manifest.""" - metadata_dict = {} - tool_dependencies_config = None - datatypes_config = get_config_from_disk( 'datatypes_conf.xml', relative_install_dir ) - if datatypes_config: - metadata_dict = generate_datatypes_metadata( datatypes_config, metadata_dict ) - sample_files = get_sample_files_from_disk( relative_install_dir ) - if sample_files: - metadata_dict[ 'sample_files' ] = sample_files - # Find all tool configs and exported workflows. - for root, dirs, files in os.walk( relative_install_dir ): - if root.find( '.hg' ) < 0 and root.find( 'hgrc' ) < 0: - if '.hg' in dirs: - dirs.remove( '.hg' ) - for name in files: - # Find all tool configs. - if name not in NOT_TOOL_CONFIGS and name.endswith( '.xml' ): - full_path = os.path.abspath( os.path.join( root, name ) ) - if not ( check_binary( full_path ) or check_image( full_path ) or check_gzip( full_path )[ 0 ] - or check_bz2( full_path )[ 0 ] or check_zip( full_path ) ): - try: - # Make sure we're looking at a tool config and not a display application config or something else. - element_tree = util.parse_xml( full_path ) - element_tree_root = element_tree.getroot() - is_tool = element_tree_root.tag == 'tool' - except Exception, e: - log.debug( "Error parsing %s, exception: %s" % ( full_path, str( e ) ) ) - is_tool = False - if is_tool: - try: - tool = toolbox.load_tool( full_path ) - except Exception, e: - tool = None - if tool is not None: - tool_config = os.path.join( root, name ) - metadata_dict = generate_tool_metadata( tool_config, tool, repository_clone_url, metadata_dict ) - # Find all exported workflows - elif name.endswith( '.ga' ): - relative_path = os.path.join( root, name ) - fp = open( relative_path, 'rb' ) - workflow_text = fp.read() - fp.close() - exported_workflow_dict = from_json_string( workflow_text ) - if 'a_galaxy_workflow' in exported_workflow_dict and exported_workflow_dict[ 'a_galaxy_workflow' ] == 'true': - metadata_dict = generate_workflow_metadata( relative_path, exported_workflow_dict, metadata_dict ) - if 'tools' in metadata_dict: - # This step must be done after metadata for tools has been defined. - tool_dependencies_config = get_config_from_disk( 'tool_dependencies.xml', relative_install_dir ) - if tool_dependencies_config: - metadata_dict = generate_tool_dependency_metadata( tool_dependencies_config, metadata_dict ) - return metadata_dict def generate_tool_guid( repository_clone_url, tool ): """ Generate a guid for the installed tool. It is critical that this guid matches the guid for @@ -1266,7 +1330,7 @@ def load_installed_datatypes( app, repository, relative_install_dir, deactivate=False ): # Load proprietary datatypes and return information needed for loading proprietary datatypes converters and display applications later. metadata = repository.metadata - work_dir = make_tmp_directory() + work_dir = tempfile.mkdtemp() repository_dict = None datatypes_config = get_config_from_repository( app, 'datatypes_conf.xml', @@ -1293,17 +1357,6 @@ def load_installed_display_applications( app, installed_repository_dict, deactivate=False ): # Load or deactivate proprietary datatype display applications app.datatypes_registry.load_display_applications( installed_repository_dict=installed_repository_dict, deactivate=deactivate ) -def make_tmp_directory(): - tmp_dir = os.getenv( 'TMPDIR', '' ) - if tmp_dir: - tmp_dir = tmp_dir.strip() - else: - home_dir = os.getenv( 'HOME' ) - tmp_dir = os.path.join( home_dir, 'tmp' ) - work_dir = os.path.join( tmp_dir, 'work_tmp' ) - if not os.path.exists( work_dir ): - os.makedirs( work_dir ) - return work_dir def open_repository_files_folder( trans, folder_path ): try: files_list = get_repository_files( trans, folder_path ) diff -r 5f2db4a18d3d3bef78f0a330cc6073f34db7c88b -r 9f790bc90769df7a4f84f103707bdd8ceaf1115d lib/galaxy/web/controllers/admin_toolshed.py --- a/lib/galaxy/web/controllers/admin_toolshed.py +++ b/lib/galaxy/web/controllers/admin_toolshed.py @@ -1,4 +1,4 @@ -import urllib2 +import urllib2, tempfile from galaxy.web.controllers.admin import * from galaxy.util.json import from_json_string, to_json_string from galaxy.util.shed_util import * @@ -522,7 +522,7 @@ # Get the tool_shed_repository from one of the tool_dependencies. message = '' tool_shed_repository = tool_dependencies[ 0 ].tool_shed_repository - work_dir = make_tmp_directory() + work_dir = tempfile.mkdtemp() # Get the tool_dependencies.xml file from the repository. tool_dependencies_config = get_config_from_repository( trans.app, 'tool_dependencies.xml', @@ -654,7 +654,7 @@ message += "from the installed repository's <b>Repository Actions</b> menu. " status = 'error' if install_tool_dependencies and tool_shed_repository.tool_dependencies and 'tool_dependencies' in metadata: - work_dir = make_tmp_directory() + work_dir = tempfile.mkdtemp() # Install tool dependencies. update_tool_shed_repository_status( trans.app, tool_shed_repository, @@ -684,7 +684,7 @@ Generate the metadata for the installed tool shed repository, among other things. This method is called from Galaxy (never the tool shed) when an admin is installing a new repository or reinstalling an uninstalled repository. """ - metadata_dict = generate_metadata_using_disk_files( trans.app.toolbox, relative_install_dir, repository_clone_url ) + metadata_dict = generate_metadata_for_changeset_revision( trans.app, relative_install_dir, repository_clone_url ) tool_shed_repository.metadata = metadata_dict trans.sa_session.add( tool_shed_repository ) trans.sa_session.flush() @@ -695,7 +695,7 @@ repository_tools_tups = get_repository_tools_tups( trans.app, metadata_dict ) if repository_tools_tups: # Handle missing data table entries for tool parameters that are dynamically generated select lists. - work_dir = make_tmp_directory() + work_dir = tempfile.mkdtemp() repository_tools_tups = handle_missing_data_table_entry( trans.app, tool_shed_repository, tool_shed_repository.changeset_revision, @@ -726,7 +726,7 @@ tool_shed_repository.includes_datatypes = True trans.sa_session.add( tool_shed_repository ) trans.sa_session.flush() - work_dir = make_tmp_directory() + work_dir = tempfile.mkdtemp() datatypes_config = get_config_from_repository( trans.app, 'datatypes_conf.xml', tool_shed_repository, @@ -779,7 +779,7 @@ message = "The repository information has been updated." elif params.get( 'set_metadata_button', False ): repository_clone_url = generate_clone_url( trans, repository ) - metadata_dict = generate_metadata_using_disk_files( trans.app.toolbox, relative_install_dir, repository_clone_url ) + metadata_dict = generate_metadata_for_changeset_revision( trans.app, relative_install_dir, repository_clone_url ) if metadata_dict: repository.metadata = metadata_dict trans.sa_session.add( repository ) @@ -1479,7 +1479,7 @@ update_repository( repo, latest_ctx_rev ) # Update the repository metadata. tool_shed = clean_tool_shed_url( tool_shed_url ) - metadata_dict = generate_metadata_using_disk_files( trans.app.toolbox, relative_install_dir, repository_clone_url ) + metadata_dict = generate_metadata_for_changeset_revision( trans.app, relative_install_dir, repository_clone_url ) repository.metadata = metadata_dict # Update the repository changeset_revision in the database. repository.changeset_revision = latest_changeset_revision diff -r 5f2db4a18d3d3bef78f0a330cc6073f34db7c88b -r 9f790bc90769df7a4f84f103707bdd8ceaf1115d lib/galaxy/webapps/community/controllers/common.py --- a/lib/galaxy/webapps/community/controllers/common.py +++ b/lib/galaxy/webapps/community/controllers/common.py @@ -1,15 +1,13 @@ -import os, string, socket, logging, simplejson, binascii +import os, string, socket, logging, simplejson, binascii, tempfile from time import strftime from datetime import * from galaxy.datatypes.checkers import * from galaxy.tools import * from galaxy.util.json import from_json_string, to_json_string from galaxy.util.hash_util import * -from galaxy.util.shed_util import clone_repository, copy_sample_file, generate_datatypes_metadata, generate_tool_dependency_metadata, generate_tool_metadata -from galaxy.util.shed_util import generate_workflow_metadata, get_changectx_for_changeset, get_config, get_config_from_disk, get_configured_ui -from galaxy.util.shed_util import get_named_tmpfile_from_ctx, get_sample_files_from_disk, handle_sample_tool_data_table_conf_file, INITIAL_CHANGELOG_HASH -from galaxy.util.shed_util import make_tmp_directory, NOT_TOOL_CONFIGS, reset_tool_data_tables, reversed_upper_bounded_changelog, strip_path, to_html_escaped -from galaxy.util.shed_util import to_html_str, update_repository +from galaxy.util.shed_util import clone_repository, generate_metadata_for_changeset_revision, get_changectx_for_changeset, get_config_from_disk +from galaxy.util.shed_util import get_configured_ui, get_named_tmpfile_from_ctx, handle_sample_tool_data_table_conf_file, INITIAL_CHANGELOG_HASH +from galaxy.util.shed_util import reset_tool_data_tables, reversed_upper_bounded_changelog, strip_path from galaxy.web.base.controller import * from galaxy.webapps.community import model from galaxy.model.orm import * @@ -107,11 +105,8 @@ trans.sa_session.flush() return item_rating -## ---- Utility methods ------------------------------------------------------- - def add_repository_metadata_tool_versions( trans, id, changeset_revisions ): - # If a repository includes tools, build a dictionary of { 'tool id' : 'parent tool id' } - # pairs for each tool in each changeset revision. + # If a repository includes tools, build a dictionary of { 'tool id' : 'parent tool id' } pairs for each tool in each changeset revision. for index, changeset_revision in enumerate( changeset_revisions ): tool_versions_dict = {} repository_metadata = get_repository_metadata_by_changeset_revision( trans, id, changeset_revision ) @@ -120,9 +115,8 @@ if metadata: tool_dicts = metadata.get( 'tools', [] ) if index == 0: - # The first changset_revision is a special case because it will have no ancestor - # changeset_revisions in which to match tools. The parent tool id for tools in - # the first changeset_revision will be the "old_id" in the tool config. + # The first changset_revision is a special case because it will have no ancestor changeset_revisions in which to match tools. + # The parent tool id for tools in the first changeset_revision will be the "old_id" in the tool config. for tool_dict in tool_dicts: tool_versions_dict[ tool_dict[ 'guid' ] ] = tool_dict[ 'id' ] else: @@ -134,43 +128,6 @@ repository_metadata.tool_versions = tool_versions_dict trans.sa_session.add( repository_metadata ) trans.sa_session.flush() -def build_changeset_revision_select_field( trans, repository, selected_value=None, add_id_to_name=True ): - """Build a SelectField whose options are the changeset_rev strings of all downloadable revisions of the received repository.""" - repo = hg.repository( get_configured_ui(), repository.repo_path ) - options = [] - changeset_tups = [] - refresh_on_change_values = [] - for repository_metadata in repository.downloadable_revisions: - changeset_revision = repository_metadata.changeset_revision - ctx = get_changectx_for_changeset( repo, changeset_revision ) - if ctx: - rev = '%04d' % ctx.rev() - label = "%s:%s" % ( str( ctx.rev() ), changeset_revision ) - else: - rev = '-1' - label = "-1:%s" % changeset_revision - changeset_tups.append( ( rev, label, changeset_revision ) ) - refresh_on_change_values.append( changeset_revision ) - # Sort options by the revision label. Even though the downloadable_revisions query sorts by update_time, - # the changeset revisions may not be sorted correctly because setting metadata over time will reset update_time. - for changeset_tup in sorted( changeset_tups ): - # Display the latest revision first. - options.insert( 0, ( changeset_tup[1], changeset_tup[2] ) ) - if add_id_to_name: - name = 'changeset_revision_%d' % repository.id - else: - name = 'changeset_revision' - select_field = SelectField( name=name, - refresh_on_change=True, - refresh_on_change_values=refresh_on_change_values ) - for option_tup in options: - selected = selected_value and option_tup[1] == selected_value - select_field.add_option( option_tup[0], option_tup[1], selected=selected ) - return select_field -def changeset_is_downloadable( metadata_dict ): - # A RepositoryMetadata record will be created if metadata_dict includes only invalid stuff like 'invalid_tools', but in this case - # it won't be downloadable. - return 'datatypes' in metadata_dict or 'tools' in metadata_dict or 'workflows' in metadata_dict def changeset_is_malicious( trans, id, changeset_revision, **kwd ): """Check the malicious flag in repository metadata for a specified change set""" repository_metadata = get_repository_metadata_by_changeset_revision( trans, id, changeset_revision ) @@ -188,55 +145,6 @@ if user_email in admin_users: return True return False -def check_tool_input_params( trans, repo_dir, tool_config, tool, sample_files, invalid_files ): - """ - Check all of the tool's input parameters, looking for any that are dynamically generated using external data files to make - sure the files exist. - """ - can_set_metadata = True - correction_msg = '' - for input_param in tool.input_params: - if isinstance( input_param, tools.parameters.basic.SelectToolParameter ) and input_param.is_dynamic: - # If the tool refers to .loc files or requires an entry in the tool_data_table_conf.xml, make sure all requirements exist. - options = input_param.dynamic_options or input_param.options - if options: - if options.tool_data_table or options.missing_tool_data_table_name: - # Make sure the repository contains a tool_data_table_conf.xml.sample file. - sample_tool_data_table_conf = get_config_from_disk( 'tool_data_table_conf.xml.sample', repo_dir ) - if sample_tool_data_table_conf: - error, correction_msg = handle_sample_tool_data_table_conf_file( trans, sample_tool_data_table_conf ) - if error: - can_set_metadata = False - invalid_files.append( ( 'tool_data_table_conf.xml.sample', correction_msg ) ) - else: - options.missing_tool_data_table_name = None - else: - can_set_metadata = False - correction_msg = "This file requires an entry in the tool_data_table_conf.xml file. Upload a file named tool_data_table_conf.xml.sample " - correction_msg += "to the repository that includes the required entry to correct this error.<br/>" - invalid_files.append( ( tool_config, correction_msg ) ) - if options.index_file or options.missing_index_file: - # Make sure the repository contains the required xxx.loc.sample file. - index_file = options.index_file or options.missing_index_file - index_file_name = strip_path( index_file ) - sample_found = False - for sample_file in sample_files: - sample_file_name = strip_path( sample_file ) - if sample_file_name == '%s.sample' % index_file_name: - options.index_file = index_file_name - options.missing_index_file = None - if options.tool_data_table: - options.tool_data_table.missing_index_file = None - sample_found = True - break - if not sample_found: - can_set_metadata = False - correction_msg = "This file refers to a file named <b>%s</b>. " % str( index_file ) - correction_msg += "Upload a file named <b>%s.sample</b> to the repository to correct this error." % str( index_file_name ) - invalid_files.append( ( tool_config, correction_msg ) ) - # Reset the tool_data_tables by loading the empty tool_data_table_conf.xml file. - reset_tool_data_tables( trans.app ) - return can_set_metadata, invalid_files def clean_repository_metadata( trans, id, changeset_revisions ): # Delete all repository_metadata records associated with the repository that have a changeset_revision that is not in changeset_revisions. # We sometimes see multiple records with the same changeset revision value - no idea how this happens. We'll assume we can delete the older @@ -369,12 +277,16 @@ return file_path return None def create_or_update_repository_metadata( trans, id, repository, changeset_revision, metadata_dict ): + downloadable = 'datatypes' in metadata_dict or 'tools' in metadata_dict or 'workflows' in metadata_dict repository_metadata = get_repository_metadata_by_changeset_revision( trans, id, changeset_revision ) if repository_metadata: repository_metadata.metadata = metadata_dict + repository_metadata.downloadable = downloadable else: - repository_metadata = trans.model.RepositoryMetadata( repository.id, changeset_revision, metadata_dict ) - repository_metadata.downloadable = changeset_is_downloadable( metadata_dict ) + repository_metadata = trans.model.RepositoryMetadata( repository_id=repository.id, + changeset_revision=changeset_revision, + metadata=metadata_dict, + downloadable=downloadable ) trans.sa_session.add( repository_metadata ) trans.sa_session.flush() def generate_clone_url( trans, repository_id ): @@ -387,69 +299,6 @@ return '%s://%s%s/repos/%s/%s' % ( protocol, username, base, repository.user.username, repository.name ) else: return '%s/repos/%s/%s' % ( base_url, repository.user.username, repository.name ) -def generate_metadata_for_changeset_revision( trans, repository_files_dir, repository_clone_url ): - """ - Generate metadata for a repository using it's files on disk. To generate metadata for changeset revisions older than the repository tip, - the repository will have been cloned to a temporary location and updated to a specified changeset revision to access that changeset revision's - disk files, so the value of repository_files_dir will not always be repository.repo_path (it could be a temporary directory containing a clone). - """ - metadata_dict = {} - invalid_files = [] - invalid_tool_configs = [] - tool_dependencies_config = None - datatypes_config = get_config_from_disk( 'datatypes_conf.xml', repository_files_dir ) - if datatypes_config: - metadata_dict = generate_datatypes_metadata( datatypes_config, metadata_dict ) - sample_files = get_sample_files_from_disk( repository_files_dir ) - if sample_files: - metadata_dict[ 'sample_files' ] = sample_files - # Find all tool configs and exported workflows. - for root, dirs, files in os.walk( repository_files_dir ): - if root.find( '.hg' ) < 0 and root.find( 'hgrc' ) < 0: - if '.hg' in dirs: - dirs.remove( '.hg' ) - for name in files: - # Find all tool configs. - if name not in NOT_TOOL_CONFIGS and name.endswith( '.xml' ): - full_path = os.path.abspath( os.path.join( root, name ) ) - if not ( check_binary( full_path ) or check_image( full_path ) or check_gzip( full_path )[ 0 ] - or check_bz2( full_path )[ 0 ] or check_zip( full_path ) ): - try: - # Make sure we're looking at a tool config and not a display application config or something else. - element_tree = util.parse_xml( full_path ) - element_tree_root = element_tree.getroot() - is_tool = element_tree_root.tag == 'tool' - except Exception, e: - print "Error parsing %s", full_path, ", exception: ", str( e ) - is_tool = False - if is_tool: - try: - tool = trans.app.toolbox.load_tool( full_path ) - tool_config = os.path.join( root, name ) - except Exception, e: - tool = None - invalid_tool_configs.append( name ) - if tool is not None: - can_set_metadata, invalid_files = check_tool_input_params( trans, repository_files_dir, tool_config, tool, sample_files, invalid_files ) - if can_set_metadata: - metadata_dict = generate_tool_metadata( tool_config, tool, repository_clone_url, metadata_dict ) - # Find all exported workflows - elif name.endswith( '.ga' ): - relative_path = os.path.join( root, name ) - fp = open( relative_path, 'rb' ) - workflow_text = fp.read() - fp.close() - exported_workflow_dict = from_json_string( workflow_text ) - if 'a_galaxy_workflow' in exported_workflow_dict and exported_workflow_dict[ 'a_galaxy_workflow' ] == 'true': - metadata_dict = generate_workflow_metadata( relative_path, exported_workflow_dict, metadata_dict ) - if 'tools' in metadata_dict: - # This step must be done after metadata for tools has been defined. - tool_dependencies_config = get_config_from_disk( 'tool_dependencies.xml', repository_files_dir ) - if tool_dependencies_config: - metadata_dict = generate_tool_dependency_metadata( tool_dependencies_config, metadata_dict ) - if invalid_tool_configs: - metadata_dict [ 'invalid_tools' ] = invalid_tool_configs - return metadata_dict, invalid_files def generate_tool_guid( trans, repository, tool ): """ Generate a guid for the received tool. The form of the guid is @@ -588,10 +437,23 @@ .first() def get_repository_metadata_by_changeset_revision( trans, id, changeset_revision ): """Get metadata for a specified repository change set from the database""" - return trans.sa_session.query( trans.model.RepositoryMetadata ) \ - .filter( and_( trans.model.RepositoryMetadata.table.c.repository_id == trans.security.decode_id( id ), - trans.model.RepositoryMetadata.table.c.changeset_revision == changeset_revision ) ) \ - .first() + # Make sure there are no duplicate records, and return the single unique record for the changeset_revision. Duplicate records were somehow + # creatd in the past. This may or may not be resolved, so when it is confirmed that the cause of duplicate records has been corrected, tweak + # this method accordingly. + all_metadata_records = trans.sa_session.query( trans.model.RepositoryMetadata ) \ + .filter( and_( trans.model.RepositoryMetadata.table.c.repository_id == trans.security.decode_id( id ), + trans.model.RepositoryMetadata.table.c.changeset_revision == changeset_revision ) ) \ + .order_by( trans.model.RepositoryMetadata.table.c.update_time.desc() ) \ + .all() + if len( all_metadata_records ) > 1: + # Delete all recrds older than the last one updated. + for repository_metadata in all_metadata_records[ 1: ]: + trans.sa_session.delete( repository_metadata ) + trans.sa_session.flush() + return all_metadata_records[ 0 ] + elif all_metadata_records: + return all_metadata_records[ 0 ] + return None def get_repository_metadata_by_id( trans, id ): """Get repository metadata from the database""" return trans.sa_session.query( trans.model.RepositoryMetadata ).get( trans.security.decode_id( id ) ) @@ -762,7 +624,7 @@ ctx = get_changectx_for_changeset( repo, changeset_revision ) tool = None message = '' - work_dir = make_tmp_directory() + work_dir = tempfile.mkdtemp() sample_files, deleted_sample_files = get_list_of_copied_sample_files( repo, ctx, dir=work_dir ) if sample_files: trans.app.config.tool_data_path = work_dir @@ -913,7 +775,7 @@ print "Cloning repository revision: ", str( ctx.rev() ) clone_repository( repository_clone_url, work_dir, str( ctx.rev() ) ) print "Generating metadata for changset revision: ", str( ctx.rev() ) - current_metadata_dict, invalid_files = generate_metadata_for_changeset_revision( trans, work_dir, repository_clone_url ) + current_metadata_dict, invalid_file_tups = generate_metadata_for_changeset_revision( trans.app, work_dir, repository_clone_url ) if current_metadata_dict: if not metadata_changeset_revision and not metadata_dict: # We're at the first change set in the change log. @@ -969,7 +831,9 @@ pass # Delete all repository_metadata records for this repository that do not have a changeset_revision value in changeset_revisions. clean_repository_metadata( trans, id, changeset_revisions ) - add_repository_metadata_tool_versions( trans, id, changeset_revisions ) + # Set tool version information for all downloadable changeset revisions. + downloadable_changeset_revisions = [ rm.changeset_revision for rm in repository.downloadable_revisions ] + add_repository_metadata_tool_versions( trans, id, downloadable_changeset_revisions ) def set_repository_metadata( trans, repository, content_alert_str='', **kwd ): """ Set metadata using the repository's current disk files, returning specific error messages (if any) to alert the repository owner that the changeset @@ -980,8 +844,9 @@ repository_clone_url = generate_clone_url( trans, trans.security.encode_id( repository.id ) ) repo_dir = repository.repo_path repo = hg.repository( get_configured_ui(), repo_dir ) - metadata_dict, invalid_files = generate_metadata_for_changeset_revision( trans, repo_dir, repository_clone_url ) + metadata_dict, invalid_file_tups = generate_metadata_for_changeset_revision( trans.app, repo_dir, repository_clone_url ) if metadata_dict: + repository_metadata = None if new_tool_metadata_required( trans, repository, metadata_dict ) or new_workflow_metadata_required( trans, repository, metadata_dict ): # Create a new repository_metadata table row. repository_metadata = trans.model.RepositoryMetadata( repository.id, repository.tip, metadata_dict ) @@ -997,10 +862,11 @@ else: repository_metadata = get_latest_repository_metadata( trans, repository.id ) if repository_metadata: + downloadable = 'datatypes' in metadata_dict or 'tools' in metadata_dict or 'workflows' in metadata_dict # Update the last saved repository_metadata table row. repository_metadata.changeset_revision = repository.tip repository_metadata.metadata = metadata_dict - repository_metadata.downloadable = changeset_is_downloadable( metadata_dict ) + repository_metadata.downloadable = downloadable trans.sa_session.add( repository_metadata ) trans.sa_session.flush() else: @@ -1008,18 +874,20 @@ repository_metadata = trans.model.RepositoryMetadata( repository.id, repository.tip, metadata_dict ) trans.sa_session.add( repository_metadata ) trans.sa_session.flush() - elif len( repo ) == 1 and not invalid_files: + if 'tools' in metadata_dict and repository_metadata and status != 'error': + add_repository_metadata_tool_versions( trans, trans.security.encode_id( repository.id ), [ repository_metadata.changeset_revision ] ) + elif len( repo ) == 1 and not invalid_file_tups: message = "Revision '%s' includes no tools, datatypes or exported workflows for which metadata can " % str( repository.tip ) message += "be defined so this revision cannot be automatically installed into a local Galaxy instance." status = "error" - if invalid_files: + if invalid_file_tups: if metadata_dict: message += "Metadata was defined for some items in revision '%s'. " % str( repository.tip ) message += "Correct the following problems if necessary and reset metadata.<br/>" else: message += "Metadata cannot be defined for revision '%s' so this revision cannot be automatically " % str( repository.tip ) message += "installed into a local Galaxy instance. Correct the following problems and reset metadata.<br/>" - for itc_tup in invalid_files: + for itc_tup in invalid_file_tups: tool_file, exception_msg = itc_tup if exception_msg.find( 'No such file or directory' ) >= 0: exception_items = exception_msg.split() diff -r 5f2db4a18d3d3bef78f0a330cc6073f34db7c88b -r 9f790bc90769df7a4f84f103707bdd8ceaf1115d lib/galaxy/webapps/community/controllers/repository.py --- a/lib/galaxy/webapps/community/controllers/repository.py +++ b/lib/galaxy/webapps/community/controllers/repository.py @@ -9,8 +9,8 @@ from galaxy.web.framework.helpers import time_ago, iff, grids from galaxy.util.json import from_json_string, to_json_string from galaxy.model.orm import * -from galaxy.util.shed_util import create_repo_info_dict, get_changectx_for_changeset, get_configured_ui, get_repository_file_contents, make_tmp_directory, NOT_TOOL_CONFIGS -from galaxy.util.shed_util import open_repository_files_folder, reversed_lower_upper_bounded_changelog, strip_path +from galaxy.util.shed_util import create_repo_info_dict, get_changectx_for_changeset, get_configured_ui, get_repository_file_contents, NOT_TOOL_CONFIGS +from galaxy.util.shed_util import open_repository_files_folder, reversed_lower_upper_bounded_changelog, strip_path, to_html_escaped, update_repository from galaxy.tool_shed.encoding_util import * from common import * @@ -113,7 +113,7 @@ grids.GridColumn.__init__( self, col_name ) def get_value( self, trans, grid, repository ): """Display a SelectField whose options are the changeset_revision strings of all downloadable_revisions of this repository.""" - select_field = build_changeset_revision_select_field( trans, repository ) + select_field = build_changeset_revision_select_field( trans, repository, downloadable_only=False ) if len( select_field.options ) > 1: return select_field.get_html() return repository.revision @@ -268,7 +268,7 @@ grids.GridColumn.__init__( self, col_name ) def get_value( self, trans, grid, repository ): """Display a SelectField whose options are the changeset_revision strings of all download-able revisions of this repository.""" - select_field = build_changeset_revision_select_field( trans, repository ) + select_field = build_changeset_revision_select_field( trans, repository, downloadable_only=True ) if len( select_field.options ) > 1: return select_field.get_html() return repository.revision @@ -1346,19 +1346,14 @@ message = util.restore_text( params.get( 'message', '' ) ) status = params.get( 'status', 'error' ) webapp = get_webapp( trans, **kwd ) + repository_clone_url = generate_clone_url( trans, repository_id ) repository = get_repository( trans, repository_id ) repo_dir = repository.repo_path repo = hg.repository( get_configured_ui(), repo_dir ) ctx = get_changectx_for_changeset( repo, changeset_revision ) invalid_message = '' - metadata_dict, invalid_files, deleted_sample_files = generate_metadata_for_changeset_revision( trans, - repo, - repository_id, - ctx, - changeset_revision, - repo_dir, - updating_tip=changeset_revision==repository.tip ) - for invalid_file_tup in invalid_files: + metadata_dict, invalid_file_tups = generate_metadata_for_changeset_revision( trans.app, repo_dir, repository_clone_url ) + for invalid_file_tup in invalid_file_tups: invalid_tool_config, invalid_msg = invalid_file_tup invalid_tool_config_name = strip_path( invalid_tool_config ) if tool_config == invalid_tool_config_name: @@ -1554,7 +1549,8 @@ changeset_revision_select_field = build_changeset_revision_select_field( trans, repository, selected_value=changeset_revision, - add_id_to_name=False ) + add_id_to_name=False, + downloadable_only=False ) revision_label = get_revision_label( trans, repository, changeset_revision ) repository_metadata = get_repository_metadata_by_changeset_revision( trans, id, changeset_revision ) if repository_metadata: @@ -1657,7 +1653,8 @@ changeset_revision_select_field = build_changeset_revision_select_field( trans, repository, selected_value=changeset_revision, - add_id_to_name=False ) + add_id_to_name=False, + downloadable_only=False ) return trans.fill_template( '/webapps/community/repository/preview_tools_in_changeset.mako', repository=repository, repository_metadata_id=repository_metadata_id, @@ -2128,7 +2125,8 @@ changeset_revision_select_field = build_changeset_revision_select_field( trans, repository, selected_value=changeset_revision, - add_id_to_name=False ) + add_id_to_name=False, + downloadable_only=False ) revision_label = get_revision_label( trans, repository, changeset_revision ) repository_metadata = get_repository_metadata_by_changeset_revision( trans, id, changeset_revision ) if repository_metadata: @@ -2185,7 +2183,8 @@ changeset_revision_select_field = build_changeset_revision_select_field( trans, repository, selected_value=changeset_revision, - add_id_to_name=False ) + add_id_to_name=False, + downloadable_only=False ) return trans.fill_template( "/webapps/community/repository/view_tool_metadata.mako", repository=repository, tool=tool, @@ -2197,3 +2196,42 @@ webapp=webapp, message=message, status=status ) + +# ----- Utility methods ----- +def build_changeset_revision_select_field( trans, repository, selected_value=None, add_id_to_name=True, downloadable_only=False ): + """Build a SelectField whose options are the changeset_rev strings of all downloadable revisions of the received repository.""" + repo = hg.repository( get_configured_ui(), repository.repo_path ) + options = [] + changeset_tups = [] + refresh_on_change_values = [] + if downloadable_only: + repository_metadata_revisions = repository.downloadable_revisions + else: + repository_metadata_revisions = repository.metadata_revisions + for repository_metadata in repository_metadata_revisions: + changeset_revision = repository_metadata.changeset_revision + ctx = get_changectx_for_changeset( repo, changeset_revision ) + if ctx: + rev = '%04d' % ctx.rev() + label = "%s:%s" % ( str( ctx.rev() ), changeset_revision ) + else: + rev = '-1' + label = "-1:%s" % changeset_revision + changeset_tups.append( ( rev, label, changeset_revision ) ) + refresh_on_change_values.append( changeset_revision ) + # Sort options by the revision label. Even though the downloadable_revisions query sorts by update_time, + # the changeset revisions may not be sorted correctly because setting metadata over time will reset update_time. + for changeset_tup in sorted( changeset_tups ): + # Display the latest revision first. + options.insert( 0, ( changeset_tup[1], changeset_tup[2] ) ) + if add_id_to_name: + name = 'changeset_revision_%d' % repository.id + else: + name = 'changeset_revision' + select_field = SelectField( name=name, + refresh_on_change=True, + refresh_on_change_values=refresh_on_change_values ) + for option_tup in options: + selected = selected_value and option_tup[1] == selected_value + select_field.add_option( option_tup[0], option_tup[1], selected=selected ) + return select_field diff -r 5f2db4a18d3d3bef78f0a330cc6073f34db7c88b -r 9f790bc90769df7a4f84f103707bdd8ceaf1115d lib/galaxy/webapps/community/model/__init__.py --- a/lib/galaxy/webapps/community/model/__init__.py +++ b/lib/galaxy/webapps/community/model/__init__.py @@ -166,12 +166,13 @@ fp.close() class RepositoryMetadata( object ): - def __init__( self, repository_id=None, changeset_revision=None, metadata=None, tool_versions=None, malicious=False ): + def __init__( self, repository_id=None, changeset_revision=None, metadata=None, tool_versions=None, malicious=False, downloadable=False ): self.repository_id = repository_id self.changeset_revision = changeset_revision self.metadata = metadata or dict() self.tool_versions = tool_versions or dict() self.malicious = malicious + self.downloadable = downloadable class ItemRatingAssociation( object ): def __init__( self, id=None, user=None, item=None, rating=0, comment='' ): diff -r 5f2db4a18d3d3bef78f0a330cc6073f34db7c88b -r 9f790bc90769df7a4f84f103707bdd8ceaf1115d templates/webapps/community/admin/reset_metadata_on_selected_repositories.mako --- a/templates/webapps/community/admin/reset_metadata_on_selected_repositories.mako +++ b/templates/webapps/community/admin/reset_metadata_on_selected_repositories.mako @@ -43,14 +43,17 @@ ${render_msg( message, status )} %endif +<div class="warningmessage"> + Resetting metadata may take a while because this process clones each change set in each selected repository's change log to a temporary location on disk. + Wait until this page redirects after clicking the <b>Reset metadata on selected repositories</b> button, as doing anything else will not be helpful. Watch + the tool shed paster log to pass the time if necessary. +</div> + <div class="toolForm"><div class="toolFormTitle">Reset all metadata on each selected repository</div><form name="reset_metadata_on_selected_repositories" id="reset_metadata_on_selected_repositories" action="${h.url_for( controller='admin', action='reset_metadata_on_selected_repositories' )}" method="post" ><div class="form-row"> - Check each repository for which you want to reset metadata. Repository names are followed by owners in parentheses. Resetting metadata - may take a while because this process clones each change set in each selected repository's change log to a temporary location on disk. - Wait until this page redirects after clicking the <b>Reset metadata on selected repositories</b> button, as doing anything else will not - be helpful. Watch the tool shed paster log to pass the time if necessary. + Check each repository for which you want to reset metadata. Repository names are followed by owners in parentheses. </div><div style="clear: both"></div><div class="form-row"> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket