1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/9e502242af28/
Changeset: 9e502242af28
User: jmchilton
Date: 2014-05-07 18:29:51
Summary: Simpler, more powerful subcollection mapping.
Collections can be mapped over 'data' parameters and sufficiently nested collections can be mapped over 'data_collection' parameters (for instance a list of 5 pairs can be supplied to a tool taking in a pair and 5 jobs will be executed). I (perhaps poorly) term these concepts collection mapping and subcollection mapping.
Prior to this changeset - the API for doing collection 'mapping' and 'subcollection mapping' was somewhat more divergent and the tool execution code explicitly forbid doing both kinds of mappings in the same tool execution even if the effective collections could be matched (e.g. it could not map a 'data' parameter over a list of 5 datasets and a pair parameter a list of 5 pairs in the same execution).
This changeset should remedy this - as long as the effective collection mappings can match up such jobs should be possible. The workflow editor (and I think runner) already thought this was possible, so this changeset reduces the tool-workflow impedance mismatch - an existing problem exacerbated by recent dataset collections introduction.
This all needs much more testing - test workflows execute this way, functional test of a tool execution that combines collection mapping and subcollection mapping, etc....
Affected #: 4 files
diff -r c5b736d3e4e5027391a04fd187e2f9d2aa4ebe46 -r 9e502242af28473660ee70ee0f98345f3604e137 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -1925,7 +1925,7 @@
# Fixed set of input parameters may correspond to any number of jobs.
# Expand these out to individual parameters for given jobs (tool
# executions).
- expanded_incomings, collection_info = expand_meta_parameters( trans, incoming, self.inputs )
+ expanded_incomings, collection_info = expand_meta_parameters( trans, self, incoming )
if not expanded_incomings:
raise exceptions.MessageException( "Tool execution failed, trying to run a tool over an empty collection." )
diff -r c5b736d3e4e5027391a04fd187e2f9d2aa4ebe46 -r 9e502242af28473660ee70ee0f98345f3604e137 lib/galaxy/tools/parameters/basic.py
--- a/lib/galaxy/tools/parameters/basic.py
+++ b/lib/galaxy/tools/parameters/basic.py
@@ -2021,7 +2021,7 @@
self._ensure_selection( field )
return field
- def _get_select_dataset_collection_field( self, trans, history, multiple=False, suffix="|__subcollection_multirun__", value=None, other_values=None ):
+ def _get_select_dataset_collection_field( self, trans, history, multiple=False, suffix="|__collection_multirun__", value=None, other_values=None ):
field_name = "%s%s" % ( self.name, suffix )
field = form_builder.SelectField( field_name, multiple, None, self.refresh_on_change, refresh_on_change_values=self.refresh_on_change_values )
dataset_matcher = DatasetMatcher( trans, self, value, other_values )
diff -r c5b736d3e4e5027391a04fd187e2f9d2aa4ebe46 -r 9e502242af28473660ee70ee0f98345f3604e137 lib/galaxy/tools/parameters/meta.py
--- a/lib/galaxy/tools/parameters/meta.py
+++ b/lib/galaxy/tools/parameters/meta.py
@@ -9,7 +9,7 @@
log = logging.getLogger( __name__ )
-def expand_meta_parameters( trans, incoming, inputs ):
+def expand_meta_parameters( trans, tool, incoming ):
"""
Take in a dictionary of raw incoming parameters and expand to a list
of expanded incoming parameters (one set of parameters per tool
@@ -34,26 +34,23 @@
def collection_classifier( input_key ):
multirun_key = "%s|__collection_multirun__" % input_key
if multirun_key in incoming:
- encoded_hdc_id = incoming[ multirun_key ]
- hdc_id = trans.app.security.decode_id( encoded_hdc_id )
- hdc = trans.sa_session.query( model.HistoryDatasetCollectionAssociation ).get( hdc_id )
- collections_to_match.add( input_key, hdc )
- hdas = hdc.collection.dataset_instances
- return permutations.input_classification.MATCHED, hdas
- else:
- return permutations.input_classification.SINGLE, incoming[ input_key ]
-
- def subcollection_classifier( input_key ):
- multirun_key = "%s|__subcollection_multirun__" % input_key
- if multirun_key in incoming:
incoming_val = incoming[ multirun_key ]
- # value will be "hdca_id|subcollection_type"
- encoded_hdc_id, subcollection_type = incoming_val.split( "|", 1 )
+ # If subcollectin multirun of data_collection param - value will
+ # be "hdca_id|subcollection_type" else it will just be hdca_id
+ if "|" in incoming_val:
+ encoded_hdc_id, subcollection_type = incoming_val.split( "|", 1 )
+ else:
+ encoded_hdc_id = incoming_val
+ subcollection_type = None
hdc_id = trans.app.security.decode_id( encoded_hdc_id )
hdc = trans.sa_session.query( model.HistoryDatasetCollectionAssociation ).get( hdc_id )
collections_to_match.add( input_key, hdc, subcollection_type=subcollection_type )
- subcollection_elements = subcollections.split_dataset_collection_instance( hdc, subcollection_type )
- return permutations.input_classification.MATCHED, subcollection_elements
+ if subcollection_type is not None:
+ subcollection_elements = subcollections.split_dataset_collection_instance( hdc, subcollection_type )
+ return permutations.input_classification.MATCHED, subcollection_elements
+ else:
+ hdas = hdc.collection.dataset_instances
+ return permutations.input_classification.MATCHED, hdas
else:
return permutations.input_classification.SINGLE, incoming[ input_key ]
@@ -72,26 +69,17 @@
multirun_found = False
collection_multirun_found = False
- subcollection_multirun_found = False
for key, value in incoming.iteritems():
multirun_found = try_replace_key( key, "|__multirun__" ) or multirun_found
collection_multirun_found = try_replace_key( key, "|__collection_multirun__" ) or collection_multirun_found
- subcollection_multirun_found = try_replace_key( key, "|__subcollection_multirun__" ) or subcollection_multirun_found
- if sum( [ 1 if f else 0 for f in [ multirun_found, collection_multirun_found, subcollection_multirun_found ] ] ) > 1:
+ if sum( [ 1 if f else 0 for f in [ multirun_found, collection_multirun_found ] ] ) > 1:
# In theory doable, but to complicated for a first pass.
message = "Cannot specify parallel execution across both multiple datasets and dataset collections."
raise exceptions.ToolMetaParameterException( message )
if multirun_found:
return permutations.expand_multi_inputs( incoming_template, classifier ), None
- elif subcollection_multirun_found:
- expanded_incomings = permutations.expand_multi_inputs( incoming_template, subcollection_classifier )
- if collections_to_match.has_collections():
- collection_info = trans.app.dataset_collections_service.match_collections( collections_to_match )
- else:
- collection_info = None
- return expanded_incomings, collection_info
else:
expanded_incomings = permutations.expand_multi_inputs( incoming_template, collection_classifier )
if collections_to_match.has_collections():
diff -r c5b736d3e4e5027391a04fd187e2f9d2aa4ebe46 -r 9e502242af28473660ee70ee0f98345f3604e137 test/api/test_tools.py
--- a/test/api/test_tools.py
+++ b/test/api/test_tools.py
@@ -264,7 +264,7 @@
history_id = self.dataset_populator.new_history()
hdca_list_id = self.__build_nested_list( history_id )
inputs = {
- "f1|__subcollection_multirun__": "%s|paired" % hdca_list_id
+ "f1|__collection_multirun__": "%s|paired" % hdca_list_id
}
# Following wait not really needed - just getting so many database
# locked errors with sqlite.
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/3534f67acd2a/
Changeset: 3534f67acd2a
Branch: stable
User: dannon
Date: 2014-05-07 15:45:53
Summary: Force an absolute path for the temp directory in the local runner
Affected #: 1 file
diff -r 61b4a993d39c5341afea857859221a4f5b94334f -r 3534f67acd2a4f84899335326ac2cc8bcf05201d lib/galaxy/jobs/runners/local.py
--- a/lib/galaxy/jobs/runners/local.py
+++ b/lib/galaxy/jobs/runners/local.py
@@ -39,7 +39,7 @@
#Set TEMP if a valid temp value is not already set
if not ( 'TMPDIR' in self._environ or 'TEMP' in self._environ or 'TMP' in self._environ ):
- self._environ[ 'TEMP' ] = tempfile.gettempdir()
+ self._environ[ 'TEMP' ] = os.path.abspath(tempfile.gettempdir())
super( LocalJobRunner, self ).__init__( app, nworkers )
self._init_worker_threads()
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/a9e4f0ea892c/
Changeset: a9e4f0ea892c
User: dannon
Date: 2014-05-07 15:45:53
Summary: Force an absolute path for the temp directory in the local runner
Affected #: 1 file
diff -r ac95389c7eb502345d0321c54b750f134c5e6ba9 -r a9e4f0ea892c8b7da3f1e468195dc1d29914fd38 lib/galaxy/jobs/runners/local.py
--- a/lib/galaxy/jobs/runners/local.py
+++ b/lib/galaxy/jobs/runners/local.py
@@ -39,7 +39,7 @@
#Set TEMP if a valid temp value is not already set
if not ( 'TMPDIR' in self._environ or 'TEMP' in self._environ or 'TMP' in self._environ ):
- self._environ[ 'TEMP' ] = tempfile.gettempdir()
+ self._environ[ 'TEMP' ] = os.path.abspath(tempfile.gettempdir())
super( LocalJobRunner, self ).__init__( app, nworkers )
self._init_worker_threads()
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/ac95389c7eb5/
Changeset: ac95389c7eb5
User: greg
Date: 2014-05-07 12:42:28
Summary: Fix type introduced in renaming the InstallManager to the ToolMigrationManager - thanks Nicola Soranzo.
Affected #: 2 files
diff -r e7b579bec0e277f0c2e26cf23661ebfa7fa5ec2d -r ac95389c7eb502345d0321c54b750f134c5e6ba9 lib/tool_shed/galaxy_install/migrate/common.py
--- a/lib/tool_shed/galaxy_install/migrate/common.py
+++ b/lib/tool_shed/galaxy_install/migrate/common.py
@@ -48,7 +48,7 @@
latest_migration_script_number = int( tools_migration_config.split( '_' )[ 0 ] )
# The value of migrated_tools_config is migrated_tools_conf.xml, and is reserved for
# containing only those tools that have been eliminated from the distribution and moved
- # to the tool shed. A side-effect of instantiating the ToolMigrationlManager is the automatic
+ # to the tool shed. A side-effect of instantiating the ToolMigrationManager is the automatic
# installation of all appropriate tool shed repositories.
self.tool_migration_manager = \
tool_migration_manager.ToolMigrationManager( app=self,
diff -r e7b579bec0e277f0c2e26cf23661ebfa7fa5ec2d -r ac95389c7eb502345d0321c54b750f134c5e6ba9 lib/tool_shed/galaxy_install/tool_migration_manager.py
--- a/lib/tool_shed/galaxy_install/tool_migration_manager.py
+++ b/lib/tool_shed/galaxy_install/tool_migration_manager.py
@@ -24,7 +24,7 @@
log = logging.getLogger( __name__ )
-class ToolMigrationlManager( object ):
+class ToolMigrationManager( object ):
def __init__( self, app, latest_migration_script_number, tool_shed_install_config, migrated_tools_config, install_dependencies ):
"""
@@ -446,7 +446,7 @@
from_tool_migration_manager=True )
for installed_tool_dependency in installed_tool_dependencies:
if installed_tool_dependency.status == self.app.install_model.ToolDependency.installation_status.ERROR:
- print '\nThe following error occurred from the InstallManager while installing tool dependency ', installed_tool_dependency.name, ':'
+ print '\nThe ToolMigrationManager returned the following error while installing tool dependency ', installed_tool_dependency.name, ':'
print installed_tool_dependency.error_message, '\n\n'
if 'datatypes' in metadata_dict:
tool_shed_repository.status = self.app.install_model.ToolShedRepository.installation_status.LOADING_PROPRIETARY_DATATYPES
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.