2 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/31b2924315d0/ Changeset: 31b2924315d0 User: jmchilton Date: 2014-06-03 19:48:18 Summary: Mechanism to define site-specific parameters to aid defining job destinations. This is based on work by Hector del Risco at the University of Florida. Various tool-style parameters can be defined in job_resource_params_conf.xml (see sample) and these define the pool of all possible resource parameters that can be injected into a tool. These can be groupped together and assigned to tools in job_conf.xml (see documented changes to job_conf.xml.sample_advanced). A conditional is then injected into each tool that gives the end-user the ability to select these parameters. The values of these parameters (as a dict) can then be obtained by dynamic job destination that define a 'resource_params' argument. Lots here still needs to be tested/verified - can workflows exported from Galaxies without resource parameters (or different ones, be reused in Galaxies with resource parameters) (I think yes?)? can workflows exported with resource parameters be used in other Galaxies (I think yes?), how best to present UI while running workflows? (maybe nothing needs to be changed), should exported job parameter data be swept clean (say when exporting a workflow for instance). A great extension to this would be to support conditional when defining parameters. Finally, the Galaxy team will make a reasonable effort to support this syntax for job_conf.xml, job_resource_params_conf.xml, and internal interface for job parameters for sometime once this changeset has reached -stable but until these interfaces may change. Affected #: 8 files diff -r 1d1d881508b121df607d9096386effdbe072427e -r 31b2924315d0b48fa7648ab4bfd04ef754829f71 job_conf.xml.sample_advanced --- a/job_conf.xml.sample_advanced +++ b/job_conf.xml.sample_advanced @@ -240,6 +240,15 @@ <param id="request_cpus">8</param></destination></destinations> + <resources default="default"> + <!-- Group different parameters defined in job_resource_params_conf.xml + together and assign these groups ids. Tool section below can map + tools to different groups. This is experimental functionality! + --> + <group id="default"></group> + <group id="memoryonly">memory</group> + <group id="all">processors,memory,time,project</group> + </resources><tools><!-- Tools can be configured to use specific destinations or handlers, identified by either the "id" or "tags" attribute. If assigned to @@ -250,6 +259,9 @@ <param id="source">trackster</param></tool><tool id="bar" destination="dynamic"/> + <!-- Next example defines resource group to insert into tool interface + and pass to dynamic destination (as resource_params argument). --> + <tool id="longbar" destination="dynamic" resources="all" /><tool id="baz" handler="special_handlers" destination="bigmem"/></tools><limits> diff -r 1d1d881508b121df607d9096386effdbe072427e -r 31b2924315d0b48fa7648ab4bfd04ef754829f71 job_resource_params_conf.xml.sample --- /dev/null +++ b/job_resource_params_conf.xml.sample @@ -0,0 +1,6 @@ +<parameters> + <param label="Processors" name="processors" type="integer" size="2" min="1" max="64" value="" help="Number of processing cores, 'ppn' value (1-64). Leave blank to use default value." /> + <param label="Memory" name="memory" type="integer" size="3" min="1" max="256" value="" help="Memory size in gigabytes, 'pmem' value (1-256). Leave blank to use default value." /> + <param label="Time" name="time" type="integer" size="3" min="1" max="744" value="" help="Maximum job time in hours, 'walltime' value (1-744). Leave blank to use default value." /> + <param label="Project" name="project" type="text" value="" help="Project to assign resource allocation to. Leave blank to use default value." /> +</parameters> diff -r 1d1d881508b121df607d9096386effdbe072427e -r 31b2924315d0b48fa7648ab4bfd04ef754829f71 lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -152,6 +152,7 @@ self.dependency_resolvers_config_file = resolve_path( kwargs.get( 'dependency_resolvers_config_file', 'dependency_resolvers_conf.xml' ), self.root ) self.job_metrics_config_file = resolve_path( kwargs.get( 'job_metrics_config_file', 'job_metrics_conf.xml' ), self.root ) self.job_config_file = resolve_path( kwargs.get( 'job_config_file', 'job_conf.xml' ), self.root ) + self.job_resource_params_file = resolve_path( kwargs.get( 'job_resource_params_file', 'job_resource_params_conf.xml' ), self.root ) self.local_job_queue_workers = int( kwargs.get( "local_job_queue_workers", "5" ) ) self.cluster_job_queue_workers = int( kwargs.get( "cluster_job_queue_workers", "3" ) ) self.job_queue_cleanup_interval = int( kwargs.get("job_queue_cleanup_interval", "5") ) diff -r 1d1d881508b121df607d9096386effdbe072427e -r 31b2924315d0b48fa7648ab4bfd04ef754829f71 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -79,6 +79,9 @@ self['params'] = dict() super(JobToolConfiguration, self).__init__(**kwds) + def get_resource_group( self ): + return self.get( "resources", None ) + class JobConfiguration( object ): """A parser and interface to advanced job management features. @@ -99,8 +102,12 @@ self.destination_tags = {} self.default_destination_id = None self.tools = {} + self.resource_groups = {} + self.default_resource_group = None + self.resource_parameters = {} self.limits = Bunch() + self.__parse_resource_parameters() # Initialize the config try: tree = util.parse_xml(self.app.config.job_config_file) @@ -190,6 +197,16 @@ # Determine the default destination self.default_destination_id = self.__get_default(destinations, self.destinations.keys()) + # Parse resources... + resources = root.find('resources') + if resources is not None: + self.default_resource_group = resources.get( "default", None ) + for group in self.__findall_with_required(resources, 'group'): + id = group.get('id') + fields_str = group.get('fields', None) or group.text or '' + fields = [ f for f in fields_str.split(",") if f ] + self.resource_groups[ id ] = fields + # Parse tool mappings tools = root.find('tools') if tools is not None: @@ -297,6 +314,49 @@ log.debug('Done loading job configuration') + def get_tool_resource_parameters( self, tool_id ): + """ Given a tool id, return XML elements describing parameters to + insert into job resources. + + :tool id: A tool ID (a string) + + :returns: List of parameter elements. + """ + fields = [] + + if not tool_id: + return fields + + # TODO: Only works with exact matches, should handle different kinds of ids + # the way destination lookup does. + resource_group = None + if tool_id in self.tools: + resource_group = self.tools[ tool_id ][ 0 ].get_resource_group() + resource_group = resource_group or self.default_resource_group + + if resource_group and resource_group in self.resource_groups: + fields_names = self.resource_groups[ resource_group ] + fields = [ self.resource_parameters[ n ] for n in fields_names ] + + return fields + + def __parse_resource_parameters( self ): + if not os.path.exists( self.app.config.job_resource_params_file ): + return + + resource_definitions = util.parse_xml( self.app.config.job_resource_params_file ) + resource_definitions_root = resource_definitions.getroot() + # TODO: Also handling conditionals would be awesome! + for parameter_elem in resource_definitions_root.findall( "param" ): + name = parameter_elem.get( "name" ) + # Considered prepending __job_resource_param__ here and then + # stripping it off when making it available to dynamic job + # destination. Not needed because resource parameters are wrapped + # in a conditional. + ## expanded_name = "__job_resource_param__%s" % name + ## parameter_elem.set( "name", expanded_name ) + self.resource_parameters[ name ] = parameter_elem + def __get_default(self, parent, names): """Returns the default attribute set in a parent tag like <handlers> or <destinations>, or return the ID of the child, if there is no explicit default and only one child. diff -r 1d1d881508b121df607d9096386effdbe072427e -r 31b2924315d0b48fa7648ab4bfd04ef754829f71 lib/galaxy/jobs/mapper.py --- a/lib/galaxy/jobs/mapper.py +++ b/lib/galaxy/jobs/mapper.py @@ -71,7 +71,7 @@ actual_args[ possible_arg_name ] = possible_args[ possible_arg_name ] # Don't hit the DB to load the job object if not needed - if "job" in function_arg_names or "user" in function_arg_names or "user_email" in function_arg_names: + if "job" in function_arg_names or "user" in function_arg_names or "user_email" in function_arg_names or "resource_params" in function_arg_names: job = self.job_wrapper.get_job() history = job.history user = history and history.user @@ -86,6 +86,24 @@ if "user_email" in function_arg_names: actual_args[ "user_email" ] = user_email + if "resource_params" in function_arg_names: + # Find the dymically inserted resource parameters and give them + # to rule. + app = self.job_wrapper.app + param_values = job.get_param_values( app, ignore_errors=True ) + resource_params = {} + try: + resource_params_raw = param_values[ "__job_resource" ] + if resource_params_raw[ "__job_resource__select" ].lower() in [ "1", "yes", "true" ]: + for key, value in resource_params_raw.iteritems(): + #if key.startswith( "__job_resource_param__" ): + # resource_key = key[ len( "__job_resource_param__" ): ] + # resource_params[ resource_key ] = value + resource_params[ key ] = value + except KeyError: + pass + actual_args[ "resource_params" ] = resource_params + return expand_function( **actual_args ) def __convert_url_to_destination( self, url ): diff -r 1d1d881508b121df607d9096386effdbe072427e -r 31b2924315d0b48fa7648ab4bfd04ef754829f71 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -86,6 +86,16 @@ WORKFLOW_PARAMETER_REGULAR_EXPRESSION = re.compile( '''\$\{.+?\}''' ) +JOB_RESOURCE_CONDITIONAL_XML = """<conditional name="__job_resource"> + <param name="__job_resource__select" type="select" label="Job Resource Parameters"> + <option value="no">Use default job resource parameters</option> + <option value="yes">Specify job resource parameters</option> + </param> + <when value="no"></when> + <when value="yes"> + </when> +</conditional>""" + class ToolNotFoundException( Exception ): pass @@ -678,6 +688,24 @@ elif root.get( 'tool_type', None ) is not None: ToolClass = tool_types.get( root.get( 'tool_type' ) ) else: + # Normal tool - only insert dynamic resource parameters for these + # tools. + if hasattr( self.app, "job_config" ): # toolshed may not have job_config? + tool_id = root.get( 'id' ) if root else None + parameters = self.app.job_config.get_tool_resource_parameters( tool_id ) + if parameters: + inputs = root.find('inputs') + # If tool has not inputs, create some so we can insert conditional + if not inputs: + inputs = ElementTree.fromstring( "<inputs></inputs>") + root.append( inputs ) + # Insert a conditional allowing user to specify resource parameters. + conditional_element = ElementTree.fromstring( JOB_RESOURCE_CONDITIONAL_XML ) + when_yes_elem = conditional_element.findall( "when" )[ 1 ] + for parameter in parameters: + when_yes_elem.append( parameter ) + inputs.append( conditional_element ) + ToolClass = Tool return ToolClass( config_file, root, self.app, guid=guid, repository_id=repository_id, **kwds ) diff -r 1d1d881508b121df607d9096386effdbe072427e -r 31b2924315d0b48fa7648ab4bfd04ef754829f71 test/unit/jobs/test_job_configuration.py --- a/test/unit/jobs/test_job_configuration.py +++ b/test/unit/jobs/test_job_configuration.py @@ -20,6 +20,7 @@ self.config = bunch.Bunch( job_config_file=os.path.join( self.temp_directory, "job_conf.xml" ), use_tasked_jobs=False, + job_resource_params_file="/tmp/fake_absent_path", ) self.__write_config_from( SIMPLE_JOB_CONF ) self.app = bunch.Bunch( config=self.config, job_metrics=MockJobMetrics() ) diff -r 1d1d881508b121df607d9096386effdbe072427e -r 31b2924315d0b48fa7648ab4bfd04ef754829f71 universe_wsgi.ini.sample --- a/universe_wsgi.ini.sample +++ b/universe_wsgi.ini.sample @@ -807,6 +807,12 @@ # individually. This only affects cluster jobs, not local jobs. #environment_setup_file = None + +# Optional file containing job resource data entry fields definition. +# These fields will be presented to users in the tool forms and allow them to +# overwrite default job resources such as number of processors, memory, and walltime. +#job_resource_params_file = job_resource_params_conf.xml + # If using job concurrency limits (configured in job_config_file), several # extra database queries must be performed to determine the number of jobs a # user has dispatched to a given destination. By default, these queries will https://bitbucket.org/galaxy/galaxy-central/commits/13ab3f66d332/ Changeset: 13ab3f66d332 User: jmchilton Date: 2014-06-03 19:48:18 Summary: Fix unit tests for 46a487e. Affected #: 1 file diff -r 31b2924315d0b48fa7648ab4bfd04ef754829f71 -r 13ab3f66d332126c72b23290f7d458223795ffce test/unit/jobs/test_job_configuration.py --- a/test/unit/jobs/test_job_configuration.py +++ b/test/unit/jobs/test_job_configuration.py @@ -100,16 +100,17 @@ assert limits.walltime is None assert limits.walltime_delta is None assert limits.output_size is None - assert limits.concurrent_jobs == {} + assert limits.destination_user_concurrent_jobs == {} + assert limits.destination_total_concurrent_jobs == {} def test_limit_overrides( self ): self.__with_advanced_config() limits = self.job_config.limits assert limits.registered_user_concurrent_jobs == 2 assert limits.anonymous_user_concurrent_jobs == 1 - assert limits.concurrent_jobs[ "local" ] == 1 - assert limits.concurrent_jobs[ "mycluster" ] == 2 - assert limits.concurrent_jobs[ "longjobs" ] == 1 + assert limits.destination_user_concurrent_jobs[ "local" ] == 1 + assert limits.destination_user_concurrent_jobs[ "mycluster" ] == 2 + assert limits.destination_user_concurrent_jobs[ "longjobs" ] == 1 assert limits.walltime_delta == datetime.timedelta( 0, 0, 0, 0, 0, 24 ) def test_env_parsing( self ): Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.