1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/138642a8dc55/ changeset: 138642a8dc55 user: dan date: 2013-02-14 21:13:06 summary: First pass at a plugable Data Manager. Data Managers can be defined locally or installed from a Tool Shed. TODO: Display Data Manager information in the Tool Shed. Do not install Data Managers into Tool Panel. Add tests. Write Docs. affected #: 28 files diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 buildbot_setup.sh --- a/buildbot_setup.sh +++ b/buildbot_setup.sh @@ -70,6 +70,8 @@ tool_data_table_conf.xml.sample shed_tool_data_table_conf.xml.sample migrated_tools_conf.xml.sample +data_manager_conf.xml.sample +shed_data_manager_conf.xml.sample tool-data/shared/ensembl/builds.txt.sample tool-data/shared/igv/igv_build_sites.txt.sample tool-data/shared/ncbi/builds.txt.sample diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 data_manager_conf.xml.sample --- /dev/null +++ b/data_manager_conf.xml.sample @@ -0,0 +1,3 @@ +<?xml version="1.0"?> +<data_managers> +</data_managers> diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -59,6 +59,7 @@ <datatype extension="bowtie_base_index" type="galaxy.datatypes.ngsindex:BowtieBaseIndex" mimetype="text/html" display_in_upload="False"/><datatype extension="csfasta" type="galaxy.datatypes.sequence:csFasta" display_in_upload="true"/><datatype extension="data" type="galaxy.datatypes.data:Data" mimetype="application/octet-stream" max_optional_metadata_filesize="1048576" /> + <datatype extension="data_manager_json" type="galaxy.datatypes.data:Text" mimetype="application/json" subclass="True" display_in_upload="False"/><datatype extension="fasta" type="galaxy.datatypes.sequence:Fasta" display_in_upload="true"><converter file="fasta_to_tabular_converter.xml" target_datatype="tabular"/><converter file="fasta_to_bowtie_base_index_converter.xml" target_datatype="bowtie_base_index"/> diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/app.py --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -18,6 +18,7 @@ from galaxy.tools.genome_index import load_genome_index_tools from galaxy.sample_tracking import external_service_types from galaxy.openid.providers import OpenIDProviders +from galaxy.tools.data_manager.manager import DataManagers class UniverseApplication( object ): """Encapsulates the state of a Universe application""" @@ -95,6 +96,8 @@ self.toolbox = tools.ToolBox( tool_configs, self.config.tool_path, self ) # Search support for tools self.toolbox_search = galaxy.tools.search.ToolBoxSearch( self.toolbox ) + # Load Data Manager + self.data_managers = DataManagers( self ) # If enabled, poll respective tool sheds to see if updates are available for any installed tool shed repositories. if self.config.get_bool( 'enable_tool_shed_check', False ): from tool_shed import update_manager diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -75,6 +75,10 @@ except: self.hours_between_check = 12 self.update_integrated_tool_panel = kwargs.get( "update_integrated_tool_panel", True ) + self.enable_data_manager_user_view = string_as_bool( kwargs.get( "enable_data_manager_user_view", "False" ) ) + self.data_manager_config_file = resolve_path( kwargs.get('data_manager_config_file', 'data_manager_conf.xml' ), self.root ) + self.shed_data_manager_config_file = resolve_path( kwargs.get('shed_data_manager_config_file', 'shed_data_manager_conf.xml' ), self.root ) + self.galaxy_data_manager_data_path = kwargs.get( 'galaxy_data_manager_data_path', self.tool_data_path ) self.tool_secret = kwargs.get( "tool_secret", "" ) self.id_secret = kwargs.get( "id_secret", "USING THE DEFAULT IS NOT SECURE!" ) self.set_metadata_externally = string_as_bool( kwargs.get( "set_metadata_externally", "False" ) ) diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -2998,6 +2998,20 @@ def set_item( self, visualization ): self.visualization = visualization +#Data Manager Classes +class DataManagerHistoryAssociation( object ): + def __init__( self, id=None, history=None, user=None ): + self.id = id + self.history = history + self.user = user + +class DataManagerJobAssociation( object ): + def __init__( self, id=None, job=None, data_manager_id=None ): + self.id = id + self.job = job + self.data_manager_id = data_manager_id +#end of Data Manager Classes + class UserPreference ( object ): def __init__( self, name=None, value=None ): self.name = name @@ -3165,6 +3179,11 @@ return 'repository_dependencies' in self.metadata return False @property + def includes_data_managers( self ): + if self.metadata: + return bool( len( self.metadata.get( 'data_manager', {} ).get( 'data_managers', {} ) ) ) + return False + @property def includes_tools( self ): if self.metadata: return 'tools' in self.metadata diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/model/mapping.py --- a/lib/galaxy/model/mapping.py +++ b/lib/galaxy/model/mapping.py @@ -932,6 +932,23 @@ Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ) ) +#Data Manager tables +DataManagerHistoryAssociation.table = Table( "data_manager_history_association", metadata, + Column( "id", Integer, primary_key=True), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, index=True, default=now, onupdate=now ), + Column( "history_id", Integer, ForeignKey( "history.id" ), index=True ), + Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ) + ) + +DataManagerJobAssociation.table = Table( "data_manager_job_association", metadata, + Column( "id", Integer, primary_key=True), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, index=True, default=now, onupdate=now ), + Column( "job_id", Integer, ForeignKey( "job.id" ), index=True ), + Column( "data_manager_id", TEXT, index=True ) + ) + # Tagging tables. Tag.table = Table( "tag", metadata, @@ -1901,6 +1918,17 @@ properties=dict( visualization=relation( Visualization ), user=relation( User ) ) ) +#Data Manager tables +assign_mapper( context, DataManagerHistoryAssociation, DataManagerHistoryAssociation.table, + properties=dict( history=relation( History ), + user=relation( User, backref='data_manager_histories' ) + ) + ) + +assign_mapper( context, DataManagerJobAssociation, DataManagerJobAssociation.table, + properties=dict( job=relation( Job, backref=backref('data_manager_association', uselist=False ), uselist=False ) ) + ) + # User tables. assign_mapper( context, UserPreference, UserPreference.table, diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/model/migrate/versions/0112_add_data_manager_history_association_and_data_manager_job_association_tables.py --- /dev/null +++ b/lib/galaxy/model/migrate/versions/0112_add_data_manager_history_association_and_data_manager_job_association_tables.py @@ -0,0 +1,66 @@ +""" +Migration script to add the data_manager_history_association table and data_manager_job_association. +""" +from sqlalchemy import * +from sqlalchemy.orm import * +from migrate import * +from migrate.changeset import * +import sys, logging +from galaxy.model.custom_types import * +from sqlalchemy.exc import * +import datetime +now = datetime.datetime.utcnow + +log = logging.getLogger( __name__ ) +log.setLevel( logging.DEBUG ) +handler = logging.StreamHandler( sys.stdout ) +format = "%(name)s %(levelname)s %(asctime)s %(message)s" +formatter = logging.Formatter( format ) +handler.setFormatter( formatter ) +log.addHandler( handler ) + +metadata = MetaData( migrate_engine ) + +DataManagerHistoryAssociation_table = Table( "data_manager_history_association", metadata, + Column( "id", Integer, primary_key=True), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, index=True, default=now, onupdate=now ), + Column( "history_id", Integer, ForeignKey( "history.id" ), index=True ), + Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ) + ) + +DataManagerJobAssociation_table = Table( "data_manager_job_association", metadata, + Column( "id", Integer, primary_key=True), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, index=True, default=now, onupdate=now ), + Column( "job_id", Integer, ForeignKey( "job.id" ), index=True ), + Column( "data_manager_id", TEXT, index=True ) + ) + +def upgrade(): + print __doc__ + metadata.reflect() + try: + DataManagerHistoryAssociation_table.create() + log.debug( "Created data_manager_history_association table" ) + except Exception, e: + log.debug( "Creating data_manager_history_association table failed: %s" % str( e ) ) + try: + DataManagerJobAssociation_table.create() + log.debug( "Created data_manager_job_association table" ) + except Exception, e: + log.debug( "Creating data_manager_job_association table failed: %s" % str( e ) ) + + +def downgrade(): + metadata.reflect() + try: + DataManagerHistoryAssociation_table.drop() + log.debug( "Dropped data_manager_history_association table" ) + except Exception, e: + log.debug( "Dropping data_manager_history_association table failed: %s" % str( e ) ) + try: + DataManagerJobAssociation_table.drop() + log.debug( "Dropped data_manager_job_association table" ) + except Exception, e: + log.debug( "Dropping data_manager_job_association table failed: %s" % str( e ) ) diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -7,7 +7,7 @@ pkg_resources.require( "MarkupSafe" ) #MarkupSafe must load before mako pkg_resources.require( "Mako" ) -import logging, os, string, sys, tempfile, glob, shutil, types, urllib, subprocess, random, math, traceback, re +import logging, os, string, sys, tempfile, glob, shutil, types, urllib, subprocess, random, math, traceback, re, pipes import simplejson import binascii from mako.template import Template @@ -26,6 +26,7 @@ from galaxy.util.expressions import ExpressionContext from galaxy.tools.test import ToolTestBuilder from galaxy.tools.actions import DefaultToolAction +from galaxy.tools.actions.data_manager import DataManagerToolAction from galaxy.tools.deps import DependencyManager from galaxy.model import directory_hash_id from galaxy.model.orm import * @@ -86,6 +87,7 @@ # In-memory dictionary that defines the layout of the tool panel. self.tool_panel = odict() self.index = 0 + self.data_manager_tools = odict() # File that contains the XML section and tool tags from all tool panel config files integrated into a # single file that defines the tool panel layout. This file can be changed by the Galaxy administrator # (in a way similar to the single tool_conf.xml file in the past) to alter the layout of the tool panel. @@ -515,7 +517,7 @@ self.integrated_tool_panel[ key ] = integrated_section else: self.integrated_tool_panel.insert( index, key, integrated_section ) - def load_tool( self, config_file, guid=None ): + def load_tool( self, config_file, guid=None, **kwds ): """Load a single tool from the file named by `config_file` and return an instance of `Tool`.""" # Parse XML configuration file and get the root element tree = util.parse_xml( config_file ) @@ -531,7 +533,7 @@ ToolClass = tool_types.get( root.get( 'tool_type' ) ) else: ToolClass = Tool - return ToolClass( config_file, root, self.app, guid=guid ) + return ToolClass( config_file, root, self.app, guid=guid, **kwds ) def reload_tool_by_id( self, tool_id ): """ Attempt to reload the tool identified by 'tool_id', if successful @@ -569,6 +571,34 @@ message += "<b>version:</b> %s" % old_tool.version status = 'done' return message, status + def remove_tool_by_id( self, tool_id ): + """ + Attempt to remove the tool identified by 'tool_id'. + """ + if tool_id not in self.tools_by_id: + message = "No tool with id %s" % tool_id + status = 'error' + else: + tool = self.tools_by_id[ tool_id ] + del self.tools_by_id[ tool_id ] + tool_key = 'tool_' + tool_id + for key, val in self.tool_panel.items(): + if key == tool_key: + del self.tool_panel[ key ] + break + elif key.startswith( 'section' ): + if tool_key in val.elems: + del self.tool_panel[ key ].elems[ tool_key ] + break + if tool_id in self.data_manager_tools: + del self.data_manager_tools[ tool_id ] + #TODO: do we need to manually remove from the integrated panel here? + message = "Removed the tool:<br/>" + message += "<b>name:</b> %s<br/>" % tool.name + message += "<b>id:</b> %s<br/>" % tool.id + message += "<b>version:</b> %s" % tool.version + status = 'done' + return message, status def load_workflow( self, workflow_id ): """ Return an instance of 'Workflow' identified by `id`, @@ -818,6 +848,7 @@ """ tool_type = 'default' + default_tool_action = DefaultToolAction def __init__( self, config_file, root, app, guid=None ): """Load a tool from the config named by `config_file`""" @@ -1070,7 +1101,7 @@ # Action action_elem = root.find( "action" ) if action_elem is None: - self.tool_action = DefaultToolAction() + self.tool_action = self.default_tool_action() else: module = action_elem.get( 'module' ) cls = action_elem.get( 'class' ) @@ -2612,12 +2643,14 @@ extra_dir="dataset_%d_files" % hda.dataset.id, alt_name = f, file_name = os.path.join(temp_file_path, f), - create = True) + create = True, + preserve_symlinks = True ) # Clean up after being handled by object store. # FIXME: If the object (e.g., S3) becomes async, this will # cause issues so add it to the object store functionality? shutil.rmtree(temp_file_path) - except: + except Exception, e: + log.debug( "Error in collect_associated_files: %s" % ( e ) ) continue def collect_child_datasets( self, output, job_working_directory ): """ @@ -2843,7 +2876,64 @@ return tool_dict -class DataSourceTool( Tool ): + def get_default_history_by_trans( self, trans, create=False ): + return trans.get_history( create=create ) + + +class OutputParameterJSONTool( Tool ): + """ + Alternate implementation of Tool that provides parameters and other values + JSONified within the contents of an output dataset + """ + tool_type = 'output_parameter_json' + def _prepare_json_list( self, param_list ): + rval = [] + for value in param_list: + if isinstance( value, dict ): + rval.append( self._prepare_json_param_dict( value ) ) + elif isinstance( value, list ): + rval.append( self._prepare_json_list( value ) ) + else: + rval.append( str( value ) ) + return rval + def _prepare_json_param_dict( self, param_dict ): + rval = {} + for key, value in param_dict.iteritems(): + if isinstance( value, dict ): + rval[ key ] = self._prepare_json_param_dict( value ) + elif isinstance( value, list ): + rval[ key ] = self._prepare_json_list( value ) + else: + rval[ key ] = str( value ) + return rval + def exec_before_job( self, app, inp_data, out_data, param_dict=None ): + if param_dict is None: + param_dict = {} + json_params = {} + json_params[ 'param_dict' ] = self._prepare_json_param_dict( param_dict ) #it would probably be better to store the original incoming parameters here, instead of the Galaxy modified ones? + json_params[ 'output_data' ] = [] + json_params[ 'job_config' ] = dict( GALAXY_DATATYPES_CONF_FILE=param_dict.get( 'GALAXY_DATATYPES_CONF_FILE' ), GALAXY_ROOT_DIR=param_dict.get( 'GALAXY_ROOT_DIR' ), TOOL_PROVIDED_JOB_METADATA_FILE=jobs.TOOL_PROVIDED_JOB_METADATA_FILE ) + json_filename = None + for i, ( out_name, data ) in enumerate( out_data.iteritems() ): + #use wrapped dataset to access certain values + wrapped_data = param_dict.get( out_name ) + #allow multiple files to be created + file_name = str( wrapped_data ) + extra_files_path = str( wrapped_data.files_path ) + data_dict = dict( out_data_name = out_name, + ext = data.ext, + dataset_id = data.dataset.id, + hda_id = data.id, + file_name = file_name, + extra_files_path = extra_files_path ) + json_params[ 'output_data' ].append( data_dict ) + if json_filename is None: + json_filename = file_name + out = open( json_filename, 'w' ) + out.write( simplejson.dumps( json_params ) ) + out.close() + +class DataSourceTool( OutputParameterJSONTool ): """ Alternate implementation of Tool for data_source tools -- those that allow the user to query and extract data from another web site. @@ -2853,29 +2943,10 @@ def _build_GALAXY_URL_parameter( self ): return ToolParameter.build( self, ElementTree.XML( '<param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=%s" />' % self.id ) ) def parse_inputs( self, root ): - Tool.parse_inputs( self, root ) + super( DataSourceTool, self ).parse_inputs( root ) if 'GALAXY_URL' not in self.inputs: self.inputs[ 'GALAXY_URL' ] = self._build_GALAXY_URL_parameter() - def _prepare_datasource_json_list( self, param_list ): - rval = [] - for value in param_list: - if isinstance( value, dict ): - rval.append( self._prepare_datasource_json_param_dict( value ) ) - elif isinstance( value, list ): - rval.append( self._prepare_datasource_json_list( value ) ) - else: - rval.append( str( value ) ) - return rval - def _prepare_datasource_json_param_dict( self, param_dict ): - rval = {} - for key, value in param_dict.iteritems(): - if isinstance( value, dict ): - rval[ key ] = self._prepare_datasource_json_param_dict( value ) - elif isinstance( value, list ): - rval[ key ] = self._prepare_datasource_json_list( value ) - else: - rval[ key ] = str( value ) - return rval + self.inputs_by_page[0][ 'GALAXY_URL' ] = self.inputs[ 'GALAXY_URL' ] def exec_before_job( self, app, inp_data, out_data, param_dict=None ): if param_dict is None: param_dict = {} @@ -2885,7 +2956,7 @@ name = param_dict.get( 'name' ) json_params = {} - json_params[ 'param_dict' ] = self._prepare_datasource_json_param_dict( param_dict ) #it would probably be better to store the original incoming parameters here, instead of the Galaxy modified ones? + json_params[ 'param_dict' ] = self._prepare_json_param_dict( param_dict ) #it would probably be better to store the original incoming parameters here, instead of the Galaxy modified ones? json_params[ 'output_data' ] = [] json_params[ 'job_config' ] = dict( GALAXY_DATATYPES_CONF_FILE=param_dict.get( 'GALAXY_DATATYPES_CONF_FILE' ), GALAXY_ROOT_DIR=param_dict.get( 'GALAXY_ROOT_DIR' ), TOOL_PROVIDED_JOB_METADATA_FILE=jobs.TOOL_PROVIDED_JOB_METADATA_FILE ) json_filename = None @@ -2976,9 +3047,59 @@ class GenomeIndexTool( Tool ): tool_type = 'index_genome' +class DataManagerTool( OutputParameterJSONTool ): + tool_type = 'manage_data' + default_tool_action = DataManagerToolAction + + def __init__( self, config_file, root, app, guid=None, data_manager_id=None, **kwds ): + self.data_manager_id = data_manager_id + super( DataManagerTool, self ).__init__( config_file, root, app, guid=guid, **kwds ) + if self.data_manager_id is None: + self.data_manager_id = self.id + + def exec_after_process( self, app, inp_data, out_data, param_dict, job = None, **kwds ): + #run original exec_after_process + super( DataManagerTool, self ).exec_after_process( app, inp_data, out_data, param_dict, job = job, **kwds ) + #process results of tool + if job and job.state == job.states.ERROR: + return + data_manager_id = job.data_manager_association.data_manager_id + data_manager = self.app.data_managers.get_manager( data_manager_id, None ) + assert data_manager is not None, "Invalid data manager (%s) requested. It may have been removed before the job completed." % ( data_manager_id ) + data_manager.process_result( out_data ) + + def get_default_history_by_trans( self, trans, create=False ): + def _create_data_manager_history( user ): + history = trans.app.model.History( name='Data Manager History (automatically created)', user=user ) + data_manager_association = trans.app.model.DataManagerHistoryAssociation( user=user, history=history ) + trans.sa_session.add_all( ( history, data_manager_association ) ) + trans.sa_session.flush() + return history + user = trans.user + assert user, 'You must be logged in to use this tool.' + history = user.data_manager_histories + if not history: + #create + if create: + history = _create_data_manager_history( user ) + else: + history = None + else: + for history in reversed( history ): + history = history.history + if not history.deleted: + break + if history.deleted: + if create: + history = _create_data_manager_history( user ) + else: + history = None + return history + + # Populate tool_type to ToolClass mappings tool_types = {} -for tool_class in [ Tool, DataDestinationTool, SetMetadataTool, DataSourceTool, AsyncDataSourceTool ]: +for tool_class in [ Tool, DataDestinationTool, SetMetadataTool, DataSourceTool, AsyncDataSourceTool, DataManagerTool ]: tool_types[ tool_class.tool_type ] = tool_class # ---- Utility classes to be factored out ----------------------------------- @@ -3020,6 +3141,8 @@ """ def __nonzero__( self ): return bool( self.value ) + def get_display_text( self, quote=True ): + return pipes.quote( self.input.value_to_display_text( self.value, self.input.tool.app ) ) class RawObjectWrapper( ToolParameterValueWrapper ): """ diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -168,7 +168,7 @@ # Set history. if not history: - history = trans.history + history = tool.get_default_history_by_trans( trans, create=True ) out_data = odict() # Collect any input datasets from the incoming parameters diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/tools/actions/data_manager.py --- /dev/null +++ b/lib/galaxy/tools/actions/data_manager.py @@ -0,0 +1,17 @@ +from __init__ import DefaultToolAction + +import logging +log = logging.getLogger( __name__ ) + +class DataManagerToolAction( DefaultToolAction ): + """Tool action used for Data Manager Tools""" + + def execute( self, tool, trans, **kwds ): + rval = super( DataManagerToolAction, self ).execute( tool, trans, **kwds ) + if isinstance( rval, tuple ) and len( rval ) == 2 and isinstance( rval[0], trans.app.model.Job ): + assoc = trans.app.model.DataManagerJobAssociation( job=rval[0], data_manager_id=tool.data_manager_id ) + trans.sa_session.add( assoc ) + trans.sa_session.flush() + else: + log.error( "Got bad return value from DefaultToolAction.execute(): %s" % ( rval ) ) + return rval diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/tools/data/__init__.py --- a/lib/galaxy/tools/data/__init__.py +++ b/lib/galaxy/tools/data/__init__.py @@ -130,6 +130,8 @@ def __init__( self, config_element, tool_data_path ): self.name = config_element.get( 'name' ) self.comment_char = config_element.get( 'comment_char' ) + self.empty_field_value = config_element.get( 'empty_field_value', '' ) + self.empty_field_values = {} for file_elem in config_element.findall( 'file' ): # There should only be one file_elem. if 'path' in file_elem.attrib: @@ -139,6 +141,8 @@ self.tool_data_file = None self.tool_data_path = tool_data_path self.missing_index_file = None + def get_empty_field_by_name( self, name ): + return self.empty_field_values.get( name, self.empty_field_value ) class TabularToolDataTable( ToolDataTable ): """ @@ -182,6 +186,7 @@ if os.path.exists( filename ): found = True all_rows.extend( self.parse_file_fields( open( filename ) ) ) + self.filename = filename else: # Since the path attribute can include a hard-coded path to a specific directory # (e.g., <file path="tool-data/cg_crr_files.loc" />) which may not be the same value @@ -193,6 +198,7 @@ if os.path.exists( corrected_filename ): found = True all_rows.extend( self.parse_file_fields( open( corrected_filename ) ) ) + self.filename = corrected_filename if not found: self.missing_index_file = filename log.warn( "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) ) @@ -231,6 +237,9 @@ self.columns[name] = index if index > self.largest_index: self.largest_index = index + empty_field_value = column_elem.get( 'empty_field_value', None ) + if empty_field_value is not None: + self.empty_field_values[ name ] = empty_field_value assert 'value' in self.columns, "Required 'value' column missing from column def" if 'name' not in self.columns: self.columns['name'] = self.columns['value'] @@ -257,7 +266,20 @@ "'%s' characters must be used to separate fields):\n%s" % ( ( i + 1 ), self.name, separator_char, line ) ) return rval - + + def get_column_name_list( self ): + rval = [] + for i in range( self.largest_index + 1 ): + found_column = False + for name, index in self.columns.iteritems(): + if index == i: + rval.append( name ) + found_column = True + break + if not found_column: + rval.append( None ) + return rval + def get_entry( self, query_attr, query_val, return_attr ): """ Returns table entry associated with a col/val pair. diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/tools/data_manager/__init__.py --- /dev/null +++ b/lib/galaxy/tools/data_manager/__init__.py @@ -0,0 +1,3 @@ +""" +Data Manager +""" diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/tools/data_manager/manager.py --- /dev/null +++ b/lib/galaxy/tools/data_manager/manager.py @@ -0,0 +1,305 @@ +import pkg_resources + +pkg_resources.require( "simplejson" ) + +import os, shutil, errno +import simplejson + +from galaxy import util +from galaxy.util.odict import odict +from galaxy.util.template import fill_template +from galaxy.tools.data import TabularToolDataTable + +#set up logger +import logging +log = logging.getLogger( __name__ ) + +SUPPORTED_DATA_TABLE_TYPES = ( TabularToolDataTable ) + +class DataManagers( object ): + def __init__( self, app, xml_filename=None ): + self.app = app + self.data_managers = odict() + self.managed_data_tables = odict() + self.tool_path = None + self.filename = xml_filename or self.app.config.data_manager_config_file + self.load_from_xml( self.filename ) + if self.app.config.shed_data_manager_config_file: + self.load_from_xml( self.app.config.shed_data_manager_config_file, store_tool_path=False ) + def load_from_xml( self, xml_filename, store_tool_path=True ): + try: + tree = util.parse_xml( xml_filename ) + except Exception, e: + log.error( 'There was an error parsing your Data Manager config file "%s": %s' % ( xml_filename, e ) ) + return #we are not able to load any data managers + root = tree.getroot() + if root.tag != 'data_managers': + log.error( 'A data managers configuration must have a "data_managers" tag as the root. "%s" is present' % ( root.tag ) ) + return + if store_tool_path: + tool_path = root.get( 'tool_path', None ) + if tool_path is None: + tool_path = self.app.config.tool_path + if not tool_path: + tool_path = '.' + self.tool_path = tool_path + for data_manager_elem in root.findall( 'data_manager' ): + self.load_manager_from_elem( data_manager_elem ) + def load_manager_from_elem( self, data_manager_elem, tool_path=None, add_manager=True ): + try: + data_manager = DataManager( self, data_manager_elem, tool_path=tool_path ) + except Exception, e: + log.error( "Error loading data_manager '%s':\n%s" % ( e, util.xml_to_string( data_manager_elem ) ) ) + return None + if add_manager: + self.add_manager( data_manager ) + log.debug( 'Loaded Data Manager: %s' % ( data_manager.id ) ) + return data_manager + def add_manager( self, data_manager ): + assert data_manager.id not in self.data_managers, "A data manager has been defined twice: %s" % ( data_manager.id ) + self.data_managers[ data_manager.id ] = data_manager + for data_table_name in data_manager.data_tables.keys(): + if data_table_name not in self.managed_data_tables: + self.managed_data_tables[ data_table_name ] = [] + self.managed_data_tables[ data_table_name ].append( data_manager ) + def get_manager( self, *args, **kwds ): + return self.data_managers.get( *args, **kwds ) + def remove_manager( self, manager_id ): + data_manager = self.get_manager( manager_id, None ) + if data_manager is not None: + del self.data_managers[ manager_id ] + #remove tool from toolbox + if data_manager.tool: + self.app.toolbox.remove_tool_by_id( data_manager.tool.id ) + #determine if any data_tables are no longer tracked + for data_table_name in data_manager.data_tables.keys(): + remove_data_table_tracking = True + for other_data_manager in self.data_managers.itervalues(): + if data_table_name in other_data_manager.data_tables: + remove_data_table_tracking = False + break + if remove_data_table_tracking and data_table_name in self.managed_data_tables: + del self.managed_data_tables[ data_table_name ] + +class DataManager( object ): + def __init__( self, data_managers, elem=None, tool_path=None ): + self.data_managers = data_managers + self.declared_id = None + self.name = None + self.description = None + self.tool = None + self.tool_guid = None + self.data_tables = odict() + self.output_ref_by_data_table = {} + self.move_by_data_table_column = {} + self.value_translation_by_data_table_column = {} + if elem is not None: + self.load_from_element( elem, tool_path or self.data_managers.tool_path ) + def load_from_element( self, elem, tool_path ): + assert elem.tag == 'data_manager', 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % ( root.tag ) + self.declared_id = elem.get( 'id', None ) + path = elem.get( 'tool_file', None ) + if path is None: + tool_elem = elem.find( 'tool' ) + assert tool_elem is not None, "Error loading tool for data manager. Make sure that a tool_file attribute or a tool tag set has been defined:\n%s" % ( util.xml_to_string( elem ) ) + path = tool_elem.get( "file", None ) + self.tool_guid = tool_elem.get( "guid", None ) + #use shed_conf_file to determine tool_path + shed_conf_file = elem.get( "shed_conf_file", None ) + if shed_conf_file: + shed_conf = self.data_managers.app.toolbox.get_shed_config_dict_by_filename( shed_conf_file, None ) + if shed_conf: + tool_path = shed_conf.get( "tool_path", tool_path ) + assert path is not None, "A tool file path could not be determined:\n%s" % ( util.xml_to_string( elem ) ) + self.load_tool( os.path.join( tool_path, path ), guid=self.tool_guid, data_manager_id=self.id ) + self.name = elem.get( 'name', self.tool.name ) + self.description = elem.get( 'description', self.tool.description ) + + for data_table_elem in elem.findall( 'data_table' ): + data_table_name = data_table_elem.get( "name" ) + assert data_table_name is not None, "A name is required for a data table entry" + if data_table_name not in self.data_tables: + self.data_tables[ data_table_name ] = odict()#{} + output_elem = data_table_elem.find( 'output' ) + if output_elem is not None: + for column_elem in output_elem.findall( 'column' ): + column_name = column_elem.get( 'name', None ) + assert column_name is not None, "Name is required for column entry" + data_table_coumn_name = column_elem.get( 'data_table_name', column_name ) + self.data_tables[ data_table_name ][ data_table_coumn_name ] = column_name + output_ref = column_elem.get( 'output_ref', None ) + if output_ref is not None: + if data_table_name not in self.output_ref_by_data_table: + self.output_ref_by_data_table[ data_table_name ] = {} + self.output_ref_by_data_table[ data_table_name ][ data_table_coumn_name ] = output_ref + value_translation_elem = column_elem.find( 'value_translation' ) + if value_translation_elem is not None: + value_translation = value_translation_elem.text + else: + value_translation = None + if value_translation is not None: + if data_table_name not in self.value_translation_by_data_table_column: + self.value_translation_by_data_table_column[ data_table_name ] = {} + self.value_translation_by_data_table_column[ data_table_name ][ data_table_coumn_name ] = value_translation + + for move_elem in column_elem.findall( 'move' ): + move_type = move_elem.get( 'type', 'directory' ) + relativize_symlinks = move_elem.get( 'relativize_symlinks', False ) #TODO: should we instead always relativize links? + source_elem = move_elem.find( 'source' ) + if source_elem is None: + source_base = None + source_value = '' + else: + source_base = source_elem.get( 'base', None ) + source_value = source_elem.text + target_elem = move_elem.find( 'target' ) + if target_elem is None: + target_base = None + target_value = '' + else: + target_base = target_elem.get( 'base', None ) + target_value = target_elem.text + if data_table_name not in self.move_by_data_table_column: + self.move_by_data_table_column[ data_table_name ] = {} + self.move_by_data_table_column[ data_table_name ][ data_table_coumn_name ] = dict( type=move_type, source_base=source_base, source_value=source_value, target_base=target_base, target_value=target_value, relativize_symlinks=relativize_symlinks ) + @property + def id( self ): + return self.tool_guid or self.declared_id #if we have a tool with a guid, we will use that as the tool_manager id + def load_tool( self, tool_filename, guid=None, data_manager_id=None ): + tool = self.data_managers.app.toolbox.load_tool( tool_filename, guid=guid, data_manager_id=data_manager_id ) + self.data_managers.app.toolbox.data_manager_tools[ tool.id ] = tool + self.data_managers.app.toolbox.tools_by_id[ tool.id ] = tool + self.tool = tool + return tool + + def process_result( self, out_data ): + data_manager_dicts = {} + data_manager_dict = {} + #TODO: fix this merging below + for output_name, output_dataset in out_data.iteritems(): + try: + output_dict = simplejson.loads( open( output_dataset.file_name ).read() ) + except Exception, e: + log.warning( 'Error reading DataManagerTool json for "%s": %s' % ( output_name, e ) ) + continue + data_manager_dicts[ output_name ] = output_dict + for key, value in output_dict.iteritems(): + if key not in data_manager_dict: + data_manager_dict[ key ] = {} + data_manager_dict[ key ].update( value ) + data_manager_dict.update( output_dict ) + + data_tables_dict = data_manager_dict.get( 'data_tables', {} ) + for data_table_name, data_table_columns in self.data_tables.iteritems(): + data_table_values = data_tables_dict.pop( data_table_name, None ) + if not data_table_values: + log.warning( 'No values for data table "%s" were returned by the data manager "%s".' % ( data_table_name, self.id ) ) + continue #next data table + data_table = self.data_managers.app.tool_data_tables.get( data_table_name, None ) + if data_table is None: + log.error( 'The data manager "%s" returned an unknown data table "%s" with new entries "%s". These entries will not be created. Please confirm that an entry for "%s" exists in your "%s" file.' % ( self.id, data_table_name, data_table_values, data_table_name, 'tool_data_table_conf.xml' ) ) + continue #next table name + if not isinstance( data_table, SUPPORTED_DATA_TABLE_TYPES ): + log.error( 'The data manager "%s" returned an unsupported data table "%s" with type "%s" with new entries "%s". These entries will not be created. Please confirm that the data table is of a supported type (%s).' % ( self.id, data_table_name, type( data_table ), data_table_values, SUPPORTED_DATA_TABLE_TYPES ) ) + continue #next table name + output_ref_values = {} + if data_table_name in self.output_ref_by_data_table: + for data_table_column, output_ref in self.output_ref_by_data_table[ data_table_name ].iteritems(): + output_ref_dataset = out_data.get( output_ref, None ) + assert output_ref_dataset is not None, "Referenced output was not found." + output_ref_values[ data_table_column ] = output_ref_dataset + + final_data_table_values = [] + if not isinstance( data_table_values, list ): + data_table_values = [ data_table_values ] + columns = data_table.get_column_name_list() + #FIXME: Need to lock these files for editing + try: + data_table_fh = open( data_table.filename, 'r+b' ) + except IOError, e: + log.warning( 'Error opening data table file (%s) with r+b, assuming file does not exist and will open as wb: %s' % ( data_table.filename, e ) ) + data_table_fh = open( data_table.filename, 'wb' ) + if os.stat( data_table.filename )[6] != 0: + # ensure last existing line ends with new line + data_table_fh.seek( -1, 2 ) #last char in file + last_char = data_table_fh.read() + if last_char not in [ '\n', '\r' ]: + data_table_fh.write( '\n' ) + for data_table_row in data_table_values: + data_table_value = dict( **data_table_row ) #keep original values here + for name, value in data_table_row.iteritems(): #FIXME: need to loop through here based upon order listed in data_manager config + if name in output_ref_values: + moved = self.process_move( data_table_name, name, output_ref_values[ name ].extra_files_path, **data_table_value ) + data_table_value[ name ] = self.process_value_translation( data_table_name, name, **data_table_value ) + final_data_table_values.append( data_table_value ) + fields = [] + for column_name in columns: + if column_name is None or column_name not in data_table_value: + fields.append( data_table.get_empty_field_by_name( column_name ) ) + else: + fields.append( data_table_value[ column_name ] ) + #should we add a comment to file about automatically generated value here? + data_table_fh.write( "%s\n" % ( data_table.separator.join( self._replace_field_separators( fields, separator=data_table.separator ) ) ) ) #write out fields to disk + data_table.data.append( fields ) #add fields to loaded data table + data_table_fh.close() + for data_table_name, data_table_values in data_tables_dict.iteritems(): + #tool returned extra data table entries, but data table was not declared in data manager + #do not add these values, but do provide messages + log.warning( 'The data manager "%s" returned an undeclared data table "%s" with new entries "%s". These entries will not be created. Please confirm that an entry for "%s" exists in your "%s" file.' % ( self.id, data_table_name, data_table_values, data_table_name, self.data_managers.filename ) ) + def _replace_field_separators( self, fields, separator="\t", replace=None, comment_char=None ): + #make sure none of the fields contain separator + #make sure separator replace is different from comment_char, + #due to possible leading replace + if replace is None: + if separator == " ": + if comment_char == "\t": + replace = "_" + else: + replace = "\t" + else: + if comment_char == " ": + replace = "_" + else: + replace = " " + return map( lambda x: x.replace( separator, replace ), fields ) + def process_move( self, data_table_name, column_name, source_base_path, relative_symlinks=False, **kwd ): + if data_table_name in self.move_by_data_table_column and column_name in self.move_by_data_table_column[ data_table_name ]: + move_dict = self.move_by_data_table_column[ data_table_name ][ column_name ] + source = move_dict[ 'source_base' ] + if source is None: + source = source_base_path + else: + source = fill_template( source, GALAXY_DATA_MANAGER_DATA_PATH=self.data_managers.app.config.galaxy_data_manager_data_path, **kwd ) + if move_dict[ 'source_value' ]: + source = os.path.join( source, fill_template( move_dict[ 'source_value' ], GALAXY_DATA_MANAGER_DATA_PATH=self.data_managers.app.config.galaxy_data_manager_data_path, **kwd ) ) + target = move_dict[ 'target_base' ] + if target is None: + target = self.data_managers.app.config.galaxy_data_manager_data_path + else: + target = fill_template( target, GALAXY_DATA_MANAGER_DATA_PATH=self.data_managers.app.config.galaxy_data_manager_data_path, **kwd ) + if move_dict[ 'target_value' ]: + target = os.path.join( target, fill_template( move_dict[ 'target_value' ], GALAXY_DATA_MANAGER_DATA_PATH=self.data_managers.app.config.galaxy_data_manager_data_path, **kwd ) ) + + if move_dict[ 'type' ] == 'file': + dirs, filename = os.path.split( target ) + try: + os.makedirs( dirs ) + except OSError, e: + if e.errno != errno.EEXIST: + raise e + #log.debug( 'Error creating directory "%s": %s' % ( dirs, e ) ) + #moving a directory and the target already exists, we move the contents instead + util.move_merge( source, target ) + + if move_dict.get( 'relativize_symlinks', False ): + util.relativize_symlinks( target ) + + return True + return False + + def process_value_translation( self, data_table_name, column_name, **kwd ): + value = kwd.get( column_name ) + if data_table_name in self.value_translation_by_data_table_column and column_name in self.value_translation_by_data_table_column[ data_table_name ]: + value_translation = self.value_translation_by_data_table_column[ data_table_name ][ column_name ] + value = fill_template( value_translation, GALAXY_DATA_MANAGER_DATA_PATH=self.data_managers.app.config.galaxy_data_manager_data_path, **kwd ) + return value diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -880,6 +880,9 @@ >>> print p.filter_value( "hg17" ) hg17 """ + def __init__( self, *args, **kwds ): + super( GenomeBuildParameter, self ).__init__( *args, **kwds ) + self.static_options = [ ( value, key, False ) for key, value in util.dbnames ] def get_options( self, trans, other_values ): if not trans.history: yield 'unspecified', '?', False diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -578,6 +578,22 @@ return curdir return join( *rel_list ) +def relativize_symlinks( path, start=None, followlinks=False): + for root, dirs, files in os.walk( path, followlinks=followlinks ): + rel_start = None + for file_name in files: + symlink_file_name = os.path.join( root, file_name ) + if os.path.islink( symlink_file_name ): + symlink_target = os.readlink( symlink_file_name ) + if rel_start is None: + if start is None: + rel_start = root + else: + rel_start = start + rel_path = relpath( symlink_target, rel_start ) + os.remove( symlink_file_name ) + os.symlink( rel_path, symlink_file_name ) + def stringify_dictionary_keys( in_dict ): #returns a new dictionary #changes unicode keys into strings, only works on top level (does not recurse) diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/util/shed_util.py --- a/lib/galaxy/util/shed_util.py +++ b/lib/galaxy/util/shed_util.py @@ -451,7 +451,7 @@ return elem_list def generate_tool_panel_dict_for_new_install( tool_dicts, tool_section=None ): """ - When installing a repository that contains tools, all tools must curently be defined within the same tool section in the tool + When installing a repository that contains tools, all tools must currently be defined within the same tool section in the tool panel or outside of any sections. """ tool_panel_dict = {} @@ -1168,6 +1168,66 @@ return False # Default to copying the file if none of the above are true. return True +def install_data_managers( app, shed_data_manager_conf_filename, metadata_dict, shed_config_dict, relative_install_dir, repository, repository_tools_tups ): + rval = [] + if 'data_manager' in metadata_dict: + repository_tools_by_guid = {} + for tool_tup in repository_tools_tups: + repository_tools_by_guid[ tool_tup[1] ] = dict( tool_config_filename=tool_tup[0], tool=tool_tup[2] ) + config_elems = [ elem for elem in util.parse_xml( shed_data_manager_conf_filename ).getroot() ] #load existing data managers + + repo_data_manager_conf_filename = metadata_dict['data_manager'].get( 'config_filename', None ) + if repo_data_manager_conf_filename is None: + log.debug( "No data_manager_conf.xml file has been defined." ) + return rval + relative_repo_data_manager_dir = os.path.join( shed_config_dict.get( 'tool_path', '' ), relative_install_dir ) + repo_data_manager_conf_filename = os.path.join( relative_repo_data_manager_dir, repo_data_manager_conf_filename ) + tree = util.parse_xml( repo_data_manager_conf_filename ) + root = tree.getroot() + for elem in root: + if elem.tag == 'data_manager': + data_manager_id = elem.get( 'id', None ) + if data_manager_id is None: + log.error( "A data manager was defined that does not have an id and will not be installed:\n%s" % ( util.xml_to_string( elem ) ) ) + continue + data_manager_dict = metadata_dict['data_manager'].get( 'data_managers', {} ).get( data_manager_id, None ) + if data_manager_dict is None: + log.error( "Data manager metadata is not defined properly for '%s'." % ( data_manager_id ) ) + continue + + tool_guid = data_manager_dict.get( 'tool_guid', None ) + if tool_guid is None: + log.error( "Data manager tool guid '%s' is not set in metadata for '%s'." % ( tool_guid, data_manager_id ) ) + continue + tool_dict = repository_tools_by_guid.get( tool_guid, None ) + if tool_dict is None: + log.error( "Data manager tool guid '%s' could not be found for '%s'. Perhaps the tool is invalid?" % ( tool_guid, data_manager_id ) ) + continue + tool = tool_dict.get( 'tool', None ) + if tool is None: + log.error( "Data manager tool with guid '%s' could not be found for '%s'. Perhaps the tool is invalid?" % ( tool_guid, data_manager_id ) ) + continue + tool_config_filename = tool_dict.get( 'tool_config_filename', None ) + if tool_config_filename is None: + log.error( "Data manager metadata is missing 'tool_config_file' for '%s'." % ( data_manager_id ) ) + continue + + elem.set( 'shed_conf_file', shed_config_dict['config_filename'] ) + if elem.get( 'tool_file', None ) is not None: + del elem.attrib[ 'tool_file' ] #remove old tool_file info + tool_elem = suc.generate_tool_elem( repository.tool_shed, repository.name, repository.installed_changeset_revision, + repository.owner, tool_config_filename, tool, None ) + elem.insert( 0, tool_elem ) + data_manager = app.data_managers.load_manager_from_elem( elem, tool_path=shed_config_dict.get( 'tool_path', '' ) ) + if data_manager: + rval.append( data_manager ) + else: + log.warning( "Encountered unexpected element '%s':\n%s" % ( elem.tag, util.xml_to_string( elem ) ) ) + config_elems.append( elem ) + # Persist the altered shed_tool_config file. + suc.data_manager_config_elems_to_xml_file( app, config_elems, shed_data_manager_conf_filename ) + return rval + def is_in_repo_info_dicts( repo_info_dict, repo_info_dicts ): """Return True if the received repo_info_dict is contained in the list of received repo_info_dicts.""" for name, repo_info_tuple in repo_info_dict.items(): @@ -1447,6 +1507,31 @@ def pull_repository( repo, repository_clone_url, ctx_rev ): """Pull changes from a remote repository to a local one.""" commands.pull( suc.get_configured_ui(), repo, source=repository_clone_url, rev=[ ctx_rev ] ) +def remove_from_data_manager( app, repository ): + metadata_dict = repository.metadata + if metadata_dict and 'data_manager' in metadata_dict: + data_manager_tool_guids = [ data_manager_dict.get( 'tool_guid' ) for data_manager_dict in metadata_dict.get( 'data_manager', {} ).get( 'data_managers', {} ).itervalues() if 'tool_guid' in data_manager_dict ] + shed_data_manager_conf_filename = app.config.shed_data_manager_config_file + tree = util.parse_xml( shed_data_manager_conf_filename ) + root = tree.getroot() + assert root.tag == 'data_managers', 'The file provided (%s) for removing data managers from is not a valid data manager xml file.' % ( shed_data_manager_conf_filename ) + config_elems = [] + for elem in root: + keep_elem = True + if elem.tag == 'data_manager': + tool_elem = elem.find( 'tool' ) + if tool_elem is not None: + tool_guid = tool_elem.get( 'guid', None ) + if tool_guid in data_manager_tool_guids: + keep_elem = False + if keep_elem: + config_elems.append( elem ) + #remove data manager from in memory + for data_manager_tool_guids in data_manager_tool_guids: + #for shed-based data managers, the data_manager id is the same as the tool guid + app.data_managers.remove_manager( data_manager_tool_guids ) + # Persist the altered shed_tool_config file. + suc.data_manager_config_elems_to_xml_file( app, config_elems, shed_data_manager_conf_filename ) def remove_from_shed_tool_config( trans, shed_tool_conf_dict, guids_to_remove ): # A tool shed repository is being uninstalled so change the shed_tool_conf file. Parse the config file to generate the entire list # of config_elems instead of using the in-memory list since it will be a subset of the entire list if one or more repositories have diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/util/shed_util_common.py --- a/lib/galaxy/util/shed_util_common.py +++ b/lib/galaxy/util/shed_util_common.py @@ -30,6 +30,7 @@ log = logging.getLogger( __name__ ) INITIAL_CHANGELOG_HASH = '000000000000' +REPOSITORY_DATA_MANAGER_CONFIG_FILENAME = "data_manager_conf.xml" # Characters that must be html escaped MAPPED_CHARS = { '>' :'>', '<' :'<', @@ -37,9 +38,10 @@ '&' : '&', '\'' : ''' } MAX_CONTENT_SIZE = 32768 -NOT_TOOL_CONFIGS = [ 'datatypes_conf.xml', 'repository_dependencies.xml', 'tool_dependencies.xml' ] +NOT_TOOL_CONFIGS = [ 'datatypes_conf.xml', 'repository_dependencies.xml', 'tool_dependencies.xml', REPOSITORY_DATA_MANAGER_CONFIG_FILENAME ] GALAXY_ADMIN_TOOL_SHED_CONTROLLER = 'GALAXY_ADMIN_TOOL_SHED_CONTROLLER' TOOL_SHED_ADMIN_CONTROLLER = 'TOOL_SHED_ADMIN_CONTROLLER' +TOOL_TYPES_NOT_IN_TOOL_PANEL = [ 'manage_data' ] VALID_CHARS = set( string.letters + string.digits + "'\"-=_.()/+*^,:?!#[]%\\$@;{}" ) new_repo_email_alert_template = """ @@ -981,6 +983,14 @@ repository_dependencies, tool_dependencies ) return repo_info_dict +def data_manager_config_elems_to_xml_file( app, config_elems, config_filename ):#, shed_tool_conf_filename ): + # Persist the current in-memory list of config_elems to a file named by the value of config_filename. + fh = open( config_filename, 'wb' ) + fh.write( '<?xml version="1.0"?>\n<data_managers>\n' )#% ( shed_tool_conf_filename )) + for elem in config_elems: + fh.write( util.xml_to_string( elem, pretty=True ) ) + fh.write( '</data_managers>\n' ) + fh.close() def ensure_required_repositories_exist_for_reinstall( trans, repository_dependencies ): """ Inspect the received repository_dependencies dictionary and make sure tool_shed_repository objects exist in the database for each entry. These @@ -1008,6 +1018,64 @@ return '%s://%s%s/repos/%s/%s' % ( protocol, username, base, repository.user.username, repository.name ) else: return '%s/repos/%s/%s' % ( base_url, repository.user.username, repository.name ) +def generate_data_manager_metadata( app, repository, repo_dir, data_manager_config_filename, metadata_dict, shed_config_dict=None ): + """Update the received metadata_dict with information from the parsed data_manager_config_filename.""" + if data_manager_config_filename is None: + return metadata_dict + try: + tree = util.parse_xml( data_manager_config_filename ) + except Exception, e: + log.error( 'There was an error parsing your Data Manager config file "%s": %s' % ( data_manager_config_filename, e ) ) + return metadata_dict #we are not able to load any data managers + tool_path = None + if shed_config_dict: + tool_path = shed_config_dict.get( 'tool_path', None ) + tools = {} + for tool in metadata_dict.get( 'tools', [] ): + tool_conf_name = tool['tool_config'] + if tool_path: + tool_conf_name = os.path.join( tool_path, tool_conf_name ) + tools[tool_conf_name] = tool + repo_path = repository.repo_path( app ) + try: + repo_files_directory = repository.repo_files_directory( app ) + repo_dir = repo_files_directory + except AttributeError: + repo_files_directory = repo_path + relative_data_manager_dir = util.relpath( os.path.split( data_manager_config_filename )[0], repo_dir ) + rel_data_manager_config_filename = os.path.join( relative_data_manager_dir, os.path.split( data_manager_config_filename )[1] ) + data_managers = {} + data_manager_metadata = { 'config_filename': rel_data_manager_config_filename, 'data_managers': data_managers }#'tool_config_files': tool_files } + metadata_dict[ 'data_manager' ] = data_manager_metadata + root = tree.getroot() + data_manager_tool_path = root.get( 'tool_path', None ) + if data_manager_tool_path: + relative_data_manager_dir = os.path.join( relative_data_manager_dir, data_manager_tool_path ) + for data_manager_elem in root.findall( 'data_manager' ): + tool_file = data_manager_elem.get( 'tool_file', None ) + data_manager_id = data_manager_elem.get( 'id', None ) + if data_manager_id is None: + log.error( 'Data Manager entry is missing id attribute in "%s".' % ( data_manager_config_filename ) ) + continue + data_tables = [] + if tool_file is None: + log.error( 'Data Manager entry is missing tool_file attribute in "%s".' % ( data_manager_config_filename ) ) + else: + for data_table_elem in data_manager_elem.findall( 'data_table' ): + data_table_name = data_table_elem.get( 'name', None ) + if data_table_name is None: + log.error( 'Data Manager data_table entry is name attribute in "%s".' % ( data_manager_config_filename ) ) + else: + data_tables.append( data_table_name ) + data_manager_metadata_tool_file = os.path.join( relative_data_manager_dir, tool_file ) + tool_metadata_tool_file = os.path.join( repo_files_directory, data_manager_metadata_tool_file ) + tool = tools.get( tool_metadata_tool_file, None ) + if tool is None: + log.error( "Unable to determine tools metadata for '%s'." % ( data_manager_metadata_tool_file ) ) + continue + data_managers[ data_manager_id ] = { 'tool_config_file': data_manager_metadata_tool_file, 'data_tables': data_tables, 'tool_guid': tool['guid'] } + log.debug( 'Loaded Data Manager tool_files: %s' % ( tool_file ) ) + return metadata_dict def generate_datatypes_metadata( datatypes_config, metadata_dict ): """Update the received metadata_dict with information from the parsed datatypes_config.""" tree = ElementTree.parse( datatypes_config ) @@ -1252,6 +1320,9 @@ exported_workflow_dict = json.from_json_string( workflow_text ) if 'a_galaxy_workflow' in exported_workflow_dict and exported_workflow_dict[ 'a_galaxy_workflow' ] == 'true': metadata_dict = generate_workflow_metadata( relative_path, exported_workflow_dict, metadata_dict ) + # Handle any data manager entries + metadata_dict = generate_data_manager_metadata( app, repository, files_dir, get_config_from_disk( REPOSITORY_DATA_MANAGER_CONFIG_FILENAME, files_dir ), metadata_dict, shed_config_dict=shed_config_dict ) + if readme_files: metadata_dict[ 'readme_files' ] = readme_files # This step must be done after metadata for tools has been defined. @@ -1531,6 +1602,7 @@ description=tool.description, version_string_cmd = tool.version_string_cmd, tool_config=tool_config, + tool_type=tool.tool_type, requirements=tool_requirements, tests=tool_tests ) if 'tools' in metadata_dict: diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py --- a/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py +++ b/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py @@ -507,6 +507,8 @@ if tool_shed_repository.includes_tools: # Handle tool panel alterations. shed_util.remove_from_tool_panel( trans, tool_shed_repository, shed_tool_conf, uninstall=remove_from_disk_checked ) + if tool_shed_repository.includes_data_managers: + shed_util.remove_from_data_manager( trans.app, tool_shed_repository ) if tool_shed_repository.includes_datatypes: # Deactivate proprietary datatypes. installed_repository_dict = shed_util.load_installed_datatypes( trans.app, tool_shed_repository, repository_install_dir, deactivate=True ) @@ -701,6 +703,9 @@ shed_tool_conf=shed_tool_conf, tool_panel_dict=tool_panel_dict, new_install=True ) + if 'data_manager' in metadata_dict: + rval = shed_util.install_data_managers( trans.app, trans.app.config.shed_data_manager_config_file, metadata_dict, shed_config_dict, relative_install_dir, + tool_shed_repository, repository_tools_tups ) if 'datatypes' in metadata_dict: tool_shed_repository.status = trans.model.ToolShedRepository.installation_status.LOADING_PROPRIETARY_DATATYPES if not tool_shed_repository.includes_datatypes: diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 lib/galaxy/webapps/galaxy/controllers/tool_runner.py --- a/lib/galaxy/webapps/galaxy/controllers/tool_runner.py +++ b/lib/galaxy/webapps/galaxy/controllers/tool_runner.py @@ -89,7 +89,7 @@ tool.input_translator.translate( params ) # We may be visiting Galaxy for the first time ( e.g., sending data from UCSC ), # so make sure to create a new history if we've never had one before. - history = trans.get_history( create=True ) + history = tool.get_default_history_by_trans( trans, create=True ) template, vars = tool.handle_input( trans, params.__dict__ ) if len( params ) > 0: trans.log_event( "Tool params: %s" % ( str( params ) ), tool_id=tool_id ) diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 run.sh --- a/run.sh +++ b/run.sh @@ -16,6 +16,8 @@ shed_tool_data_table_conf.xml.sample tool_data_table_conf.xml.sample tool_sheds_conf.xml.sample + data_manager_conf.xml.sample + shed_data_manager_conf.xml.sample openid_conf.xml.sample universe_wsgi.ini.sample tool-data/shared/ncbi/builds.txt.sample diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 shed_data_manager_conf.xml.sample --- /dev/null +++ b/shed_data_manager_conf.xml.sample @@ -0,0 +1,3 @@ +<?xml version="1.0"?> +<data_managers> +</data_managers> diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 templates/webapps/galaxy/admin/index.mako --- a/templates/webapps/galaxy/admin/index.mako +++ b/templates/webapps/galaxy/admin/index.mako @@ -59,6 +59,7 @@ %if trans.app.config.enable_beta_job_managers: <div class="toolTitle"><a href="${h.url_for( controller='data_admin', action='manage_data' )}" target="galaxy_main">Manage local data</a></div> %endif + <div class="toolTitle"><a href="${h.url_for( controller='data_manager' )}" target="galaxy_main">Manage local data (beta)</a></div></div></div><div class="toolSectionPad"></div> diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 templates/webapps/galaxy/data_manager/index.mako --- /dev/null +++ b/templates/webapps/galaxy/data_manager/index.mako @@ -0,0 +1,57 @@ +<%inherit file="/base.mako"/> +<%namespace file="/message.mako" import="render_msg" /> + +<%def name="title()">Data Manager</%def> + +%if message: + ${render_msg( message, status )} +%endif + +<h2>Data Manager</h2> + +%if view_only: + <p>Not implemented</p> +%else: + <p>Choose your data managing option from below.</p> + <ul> + <li><strong>Access data managers</strong> - get data, build indexes, etc + <p/> + <ul> + %for data_manager_id, data_manager in data_managers.data_managers.iteritems(): + <li> + <a href="${ h.url_for( 'tool_runner?tool_id=%s' % ( data_manager.tool.id ) ) }"><strong>${ data_manager.name | h }</strong></a> - ${ data_manager.description | h } + </li> + <p/> + %endfor + </ul> + </li> + <p/> + <li><strong>View managed data by manager</strong> + <p/> + <ul> + %for data_manager_id, data_manager in data_managers.data_managers.iteritems(): + <li> + <a href="${h.url_for( controller='data_manager', action='manage_data_manager', id=data_manager_id)}" target="galaxy_main"><strong>${ data_manager.name | h }</strong></a> - ${ data_manager.description | h }</a> + </li> + <p/> + %endfor + </ul> + </li> + <p/> + <p/> + <li><strong>View managed data by Tool Data Table</strong> + <p/> + <ul> + %for table_name, managers in data_managers.managed_data_tables.iteritems(): + <li> + <a href="${h.url_for( controller='data_manager', action='manage_data_table', table_name=table_name)}" target="galaxy_main"><strong>${ table_name | h }</strong></a> + </li> + <p/> + %endfor + </ul> + </li> + <p/> + </ul> + <p/> + <br/> +%endif diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 templates/webapps/galaxy/data_manager/manage_data_manager.mako --- /dev/null +++ b/templates/webapps/galaxy/data_manager/manage_data_manager.mako @@ -0,0 +1,50 @@ +<%inherit file="/base.mako"/> +<%namespace file="/message.mako" import="render_msg" /> + +<%def name="title()">Data Manager: ${ data_manager.name | h } - ${ data_manager.description | h }</%def> + +%if message: + ${render_msg( message, status )} +%endif + +<h2>Data Manager: ${ data_manager.name | h } - ${ data_manager.description | h }</h2> + +%if view_only: + <p>Not implemented</p> +%else: + <p>Access managed data by job</p> + +%if jobs: +<form name="jobs" action="${h.url_for()}" method="POST"> + <table class="manage-table colored" border="0" cellspacing="0" cellpadding="0" width="100%"> + <tr class="header"> + <td>Job ID</td> + <td>User</td> + <td>Last Update</td> + <td>State</td> + <td>Command Line</td> + <td>Job Runner</td> + <td>PID/Cluster ID</td> + </tr> + %for job in jobs: + <td><a href="${ h.url_for( controller="data_manager", action="view_job", id=trans.security.encode_id( job.id ) ) }">${ job.id | h }</a></td> + %if job.history and job.history.user: + <td>${job.history.user.email | h}</td> + %else: + <td>anonymous</td> + %endif + <td>${job.update_time | h}</td> + <td>${job.state | h}</td> + <td>${job.command_line | h}</td> + <td>${job.job_runner_name | h}</td> + <td>${job.job_runner_external_id | h}</td> + </tr> + %endfor + </table> + <p/> +</form> +%else: + <div class="infomessage">There are no jobs for this data manager.</div> +%endif + +%endif diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 templates/webapps/galaxy/data_manager/manage_data_table.mako --- /dev/null +++ b/templates/webapps/galaxy/data_manager/manage_data_table.mako @@ -0,0 +1,36 @@ +<%inherit file="/base.mako"/> +<%namespace file="/message.mako" import="render_msg" /> + +<%def name="title()">Data Table Manager: ${ data_table.name | h }</%def> + +%if message: + ${render_msg( message, status )} +%endif + +%if view_only: + <p>Not implemented</p> +%else: +<% column_name_list = data_table.get_column_name_list() %> +<table class="tabletip"> + <thead> + <tr><th colspan="${len (column_name_list) }" style="font-size: 120%;"> + Data Manager: ${ data_table.name | h } + </th></tr> + <tr> + + %for name in column_name_list: + <th>${name | h}</th> + %endfor + </tr> + </thead> + <tbody> + %for table_row in data_table.data: + <tr> + %for field in table_row: + <td>${field | h}</td> + %endfor + </tr> + %endfor +</table> + +%endif diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 templates/webapps/galaxy/data_manager/view_job.mako --- /dev/null +++ b/templates/webapps/galaxy/data_manager/view_job.mako @@ -0,0 +1,57 @@ +<%inherit file="/base.mako"/> +<%namespace file="/message.mako" import="render_msg" /> +<% from galaxy.util import nice_size %> + +<%def name="title()">Data Manager: ${ data_manager.name | h } - ${ data_manager.description | h }</%def> + +%if message: + ${render_msg( message, status )} +%endif + +%if view_only: + <p>Not implemented</p> +%else: +%for i, hda in enumerate( hdas ): +<table class="tabletip"> + <thead> + <tr><th colspan="2" style="font-size: 120%;"> + Data Manager: ${ data_manager.name | h } - ${ data_manager.description | h } + </th></tr> + </thead> + <tbody> + <tr><td>Name:</td><td>${hda.name | h}</td></tr> + <tr><td>Created:</td><td>${hda.create_time.strftime("%b %d, %Y")}</td></tr> + <tr><td>Filesize:</td><td>${nice_size(hda.dataset.file_size)}</td></tr> + <tr><td>Tool Exit Code:</td><td>${job.exit_code | h}</td></tr> + <tr><td>Full Path:</td><td>${hda.file_name | h}</td></tr> + <tr><td>View complete info:</td><td><a href="${h.url_for( controller='dataset', action='show_params', dataset_id=trans.security.encode_id( hda.id ))}">${ hda.id | h }</a></td></tr> + +</table> +<br /> + +<% json_tables = data_manager_output[i]%> +%for table_name, json_table in json_tables: +<table class="tabletip"> + <thead> + <tr><th colspan="2" style="font-size: 120%;"> + Data Table: ${ table_name | h } + </th></tr> + </thead> + <% len_json_table = len( json_table ) %> + %for j, table_row in enumerate( json_table ): + <tbody> + %if len_json_table > 1: + <tr><td><strong>Entry #${j}</strong></td><td></td></tr> + %endif + %for name, value in table_row.iteritems(): + <tr><td>${name | h}:</td><td>${value | h}</td></tr> + %endfor + %endfor + </tbody> +</table> +<br /> +%endfor + +%endfor + +%endif diff -r db08c095de0c249f3a6cde62f254c104de1fb6ea -r 138642a8dc55689a3cab1d070aac72b293265bc9 universe_wsgi.ini.sample --- a/universe_wsgi.ini.sample +++ b/universe_wsgi.ini.sample @@ -573,6 +573,16 @@ # Details" option in the history. Administrators can always see this. #expose_dataset_path = False +# Data manager configuration options +# Allow non-admin users to view available Data Manager options +#enable_data_manager_user_view = False +# File where Data Managers are configured +#data_manager_config_file = data_manager_conf.xml +# File where Tool Shed based Data Managers are configured +#shed_data_manager_config_file = shed_data_manager_conf.xml +# Directory to store Data Manager based tool-data; defaults to tool_data_path +#galaxy_data_manager_data_path = tool-data + # -- Job Execution # To increase performance of job execution and the web interface, you can Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.