[hg] galaxy 3356: First pass at allowing explicit datatype conve...

details: http://www.bx.psu.edu/hg/galaxy/rev/c64ef44ed4c5 changeset: 3356:c64ef44ed4c5 user: Dan Blankenberg <dan@bx.psu.edu> date: Mon Feb 08 12:45:28 2010 -0500 description: First pass at allowing explicit datatype conversion to be specified. This can be used for e.g. providing a tool with a non-metadata based index file. Defined as part of a DataToolParameter: <param name="input1" type="interval" label="An Interval File"> <converter name='input1_as_a_bed_file' type='bed'/> </param> Both the original input as well as the converted dataset can be accessed like: <command>some_binary $input1 $input1_as_a_bed_file </command> if $input1 is already BED, it will be used for input1_as_a_bed_file. The name is be placed in the dictionary space of the data input parameter's parent; so for Grouping objects, e.g. a repeat: <repeat name="queries" title="Query"> <param name="input2" type="data" label="Select" > <converter name='input2_as_a_bed_file' type='bed'/> </param> </repeat> is accessed like (putting both the original and converted dataset as arguments on the command line): <command> ... #for $q in $queries ${q.input2} ${q.input2_as_a_bed_file} #end for ... </command> See notes in code in commit for additional comments. diffstat: lib/galaxy/tools/__init__.py | 28 ++++++++++++++++++++++++ lib/galaxy/tools/actions/__init__.py | 41 +++++++++++++++++++++++++++++------ lib/galaxy/tools/parameters/basic.py | 13 +++++++++- 3 files changed, 73 insertions(+), 9 deletions(-) diffs (147 lines): diff -r 26f01eafc6bd -r c64ef44ed4c5 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Mon Feb 08 11:20:44 2010 -0500 +++ b/lib/galaxy/tools/__init__.py Mon Feb 08 12:45:28 2010 -0500 @@ -1198,6 +1198,28 @@ current = values["__current_case__"] wrap_values( input.cases[current].inputs, values ) elif isinstance( input, DataToolParameter ): + ##FIXME: We're populating param_dict with converters when wrapping values, + ##this should happen as a separate step before wrapping (or call this wrapping step something more generic) + ##(but iterating this same list twice would be wasteful) + #add explicit converters by name to current parent + for converter_name, converter_extensions, converter_datatypes in input.converters: + #if we are at building cmdline step, then converters have already executed + conv_ext, converted_dataset = input_values[ input.name ].find_conversion_destination( converter_datatypes ) + #when dealing with optional inputs, we'll provide a valid extension to be used for None converted dataset + if not conv_ext: + conv_ext = converter_extensions[0] + #input_values[ input.name ] is None when optional dataset, + #'conversion' of optional dataset should create wrapper around NoneDataset for converter output + if input_values[ input.name ] and not converted_dataset: + #input that converter is based from has a value, but converted dataset does not exist + raise Exception, 'A path for explicit datatype conversion has not been found: %s --/--> %s' % ( input_values[ input.name ].extension, converter_extensions ) + else: + input_values[ converter_name ] = \ + DatasetFilenameWrapper( converted_dataset, + datatypes_registry = self.app.datatypes_registry, + tool = Bunch( converter_name = Bunch( extensions = conv_ext ) ), #trick wrapper into using target conv ext (when None) without actually being a tool parameter + name = converter_name ) + #wrap actual input dataset input_values[ input.name ] = \ DatasetFilenameWrapper( input_values[ input.name ], datatypes_registry = self.app.datatypes_registry, @@ -1212,6 +1234,12 @@ # tools (e.g. UCSC) should really be handled in a special way. if self.check_values: wrap_values( self.inputs, param_dict ) + ###FIXME: when self.check_values==True, input datasets are being wrapped twice + ### (above and below, creating 2 separate DatasetFilenameWrapper objects - first is overwritten by second), + ###is this necessary? - if we get rid of this way to access children, can we stop this redundancy, or is there another reason for this? + ###Only necessary when self.check_values is False (==external dataset tool?: can this be abstracted out as part of being a datasouce tool?) + ### but we still want (ALWAYS) to wrap input datasets + ### (this should be checked to prevent overhead of creating a new object?) # Additionally, datasets go in the param dict. We wrap them such that # if the bare variable name is used it returns the filename (for # backwards compatibility). We also add any child datasets to the diff -r 26f01eafc6bd -r c64ef44ed4c5 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py Mon Feb 08 11:20:44 2010 -0500 +++ b/lib/galaxy/tools/actions/__init__.py Mon Feb 08 12:45:28 2010 -0500 @@ -31,11 +31,13 @@ """ input_datasets = dict() def visitor( prefix, input, value, parent = None ): - def process_dataset( data ): - if data and not isinstance( data.datatype, input.formats ): + def process_dataset( data, formats = None ): + if formats is None: + formats = input.formats + if data and not isinstance( data.datatype, formats ): # Need to refresh in case this conversion just took place, i.e. input above in tool performed the same conversion trans.sa_session.refresh( data ) - target_ext, converted_dataset = data.find_conversion_destination( input.formats, converter_safe = input.converter_safe( param_values, trans ) ) + target_ext, converted_dataset = data.find_conversion_destination( formats, converter_safe = input.converter_safe( param_values, trans ) ) if target_ext: if converted_dataset: data = converted_dataset @@ -61,16 +63,41 @@ # are stored as name1, name2, ... for i, v in enumerate( value ): input_datasets[ prefix + input.name + str( i + 1 ) ] = process_dataset( v ) + converters = [] + for converter_name, converter_extensions, converter_datatypes in input.converters: + new_data = process_dataset( input_datasets[ prefix + input.name + str( i + 1 ) ], converter_datatypes ) + if not new_data or isinstance( new_data.datatype, converter_datatypes ): + input_datasets[ prefix + converter_name + str( i + 1 ) ] = new_data + converters.append( ( converter_name, new_data ) ) + else: + raise Exception, 'A path for explicit datatype conversion has not been found: %s --/--> %s' % ( input_datasets[ prefix + input.name + str( i + 1 ) ].extension, converter_extensions ) if parent: parent[input.name] = input_datasets[ prefix + input.name + str( i + 1 ) ] + for converter_name, converter_data in converters: + #allow explicit conversion to be stored in job_parameter table + parent[ converter_name ] = converter_data.id #a more robust way to determine JSONable value is desired else: param_values[input.name][i] = input_datasets[ prefix + input.name + str( i + 1 ) ] + for converter_name, converter_data in converters: + #allow explicit conversion to be stored in job_parameter table + param_values[ converter_name ][i] = converter_data.id #a more robust way to determine JSONable value is desired else: input_datasets[ prefix + input.name ] = process_dataset( value ) - if parent: - parent[input.name] = input_datasets[ prefix + input.name ] - else: - param_values[input.name] = input_datasets[ prefix + input.name ] + converters = [] + for converter_name, converter_extensions, converter_datatypes in input.converters: + new_data = process_dataset( input_datasets[ prefix + input.name ], converter_datatypes ) + if not new_data or isinstance( new_data.datatype, converter_datatypes ): + input_datasets[ prefix + converter_name ] = new_data + converters.append( ( converter_name, new_data ) ) + else: + raise Exception, 'A path for explicit datatype conversion has not been found: %s --/--> %s' % ( input_datasets[ prefix + input.name ].extension, converter_extensions ) + target_dict = parent + if not target_dict: + target_dict = param_values + target_dict[ input.name ] = input_datasets[ prefix + input.name ] + for converter_name, converter_data in converters: + #allow explicit conversion to be stored in job_parameter table + target_dict[ converter_name ] = converter_data.id #a more robust way to determine JSONable value is desired tool.visit_inputs( param_values, visitor ) return input_datasets diff -r 26f01eafc6bd -r c64ef44ed4c5 lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py Mon Feb 08 11:20:44 2010 -0500 +++ b/lib/galaxy/tools/parameters/basic.py Mon Feb 08 12:45:28 2010 -0500 @@ -50,14 +50,14 @@ def get_html( self, trans=None, value=None, other_values={}): """ - Returns the html widget corresponding to the paramter. + Returns the html widget corresponding to the parameter. Optionally attempt to retain the current value specific by 'value' """ return self.get_html_field( trans, value, other_values ).get_html() def from_html( self, value, trans=None, other_values={} ): """ - Convert a value from an HTML POST into the parameters prefered value + Convert a value from an HTML POST into the parameters preferred value format. """ return value @@ -1168,6 +1168,15 @@ else: self.options = dynamic_options.DynamicOptions( options, self ) self.is_dynamic = self.options is not None + # Load converters required for the dataset input + self.converters = [] + for conv_elem in elem.findall( "converter" ): + name = conv_elem.get( "name" ) #name for commandline substitution + conv_extensions = conv_elem.get( "type" ) #target datatype extension + # FIXME: conv_extensions should be able to be an ordered list + assert None not in [ name, type ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, type ) + conv_types = tool.app.datatypes_registry.get_datatype_by_extension( conv_extensions.lower() ).__class__ + self.converters.append( ( name, conv_extensions, conv_types ) ) def get_html_field( self, trans=None, value=None, other_values={} ): filter_value = None
participants (1)
-
Greg Von Kuster