details: http://www.bx.psu.edu/hg/galaxy/rev/dd5f1fe8f5e9 changeset: 2485:dd5f1fe8f5e9 user: Dan Blankenberg <dan@bx.psu.edu> date: Wed Jul 15 14:11:35 2009 -0400 description: Initial pass at allowing the setting of certain metadata parameters on upload (controlled via a flag). This allows the user to specify the 'base_name' to be used for Rgenetics datatypes, etc. Bunch of cleanup needed in upload. 9 file(s) affected in this change: lib/galaxy/datatypes/data.py lib/galaxy/datatypes/genetics.py lib/galaxy/datatypes/metadata.py lib/galaxy/datatypes/registry.py lib/galaxy/tools/__init__.py lib/galaxy/tools/actions/upload.py lib/galaxy/tools/parameters/grouping.py templates/tool_form.mako tools/data_source/upload.xml diffs (418 lines): diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py Wed Jul 15 12:18:43 2009 -0400 +++ b/lib/galaxy/datatypes/data.py Wed Jul 15 14:11:35 2009 -0400 @@ -49,6 +49,8 @@ """If False, the peek is regenerated whenever a dataset of this type is copied""" copy_safe_peek = True + + is_binary = True #The dataset contains binary data --> do not space_to_tab or convert newlines, etc. Allow binary file uploads of this type when True. #Composite datatypes composite_type = None @@ -250,7 +252,8 @@ def after_edit( self, dataset ): """This function is called on the dataset after metadata is edited.""" dataset.clear_associated_files( metadata_safe = True ) - def __new_composite_file( self, optional = False, mimetype = None, description = None, substitute_name_with_metadata = None, **kwds ): + def __new_composite_file( self, name, optional = False, mimetype = None, description = None, substitute_name_with_metadata = None, **kwds ): + kwds[ 'name' ] = name kwds[ 'optional' ] = optional kwds[ 'mimetype' ] = mimetype kwds[ 'description' ] = description @@ -258,7 +261,7 @@ return Bunch( **kwds ) def add_composite_file( self, name, **kwds ): #self.composite_files = self.composite_files.copy() - self.composite_files[ name ] = self.__new_composite_file( **kwds ) + self.composite_files[ name ] = self.__new_composite_file( name, **kwds ) def __substitute_composite_key( self, key, composite_file, dataset = None ): @@ -273,7 +276,7 @@ def writable_files( self, dataset = None ): files = odict() if self.composite_type != 'auto_primary_file': - files[ self.primary_file_name ] = self.__new_composite_file() + files[ self.primary_file_name ] = self.__new_composite_file( self.primary_file_name ) for key, value in self.get_composite_files( dataset = dataset ).iteritems(): files[ key ] = value return files diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/datatypes/genetics.py --- a/lib/galaxy/datatypes/genetics.py Wed Jul 15 12:18:43 2009 -0400 +++ b/lib/galaxy/datatypes/genetics.py Wed Jul 15 14:11:35 2009 -0400 @@ -117,7 +117,7 @@ class Rgenetics(Html): """class to use for rgenetics""" """Add metadata elements""" - MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default="galaxy", readonly=True) + MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default="galaxy", readonly=True, set_in_upload=True) file_ext="html" composite_type = 'auto_primary_file' @@ -151,10 +151,7 @@ else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' - #def sniff( self, filename ): - # """ - # """ - # return True + class Lped(Rgenetics): """fake class to distinguish different species of Rgenetics data collections @@ -245,7 +242,10 @@ MetadataElement( name="columns", default=0, desc="Number of columns", readonly=True, visible=False ) MetadataElement( name="column_names", default=[], desc="Column names", readonly=True,visible=True ) MetadataElement( name="base_name", - desc="base name for all transformed versions of this genetic dataset", readonly=True) + desc="base name for all transformed versions of this genetic dataset", readonly=True, default='galaxy', set_in_upload=True) + ### Do we really need these below? can we rely on dataset.extra_files_path: os.path.join( dataset.extra_files_path, '%s.phenodata' % dataset.metadata.base_name ) ? + ### Do these have a different purpose? Ross will need to clarify + ### Uploading these datatypes will not work until this is sorted out (set_peek fails)... MetadataElement( name="pheno_path", desc="Path to phenotype data for this experiment", readonly=True) MetadataElement( name="pheno", @@ -253,11 +253,19 @@ file_ext = None + is_binary = True + + composite_type = 'basic' + + def __init__( self, **kwd ): + Html.__init__( self, **kwd ) + self.add_composite_file( '%s.phenodata', substitute_name_with_metadata = 'base_name' ) + def set_peek( self, dataset ): """expects a .pheno file in the extra_files_dir - ugh note that R is wierd and does not include the row.name in the header. why?""" - p = file(dataset.metadata.pheno_path,'r').readlines() + p = file(dataset.metadata.pheno_path,'r').readlines() #this fails head = p[0].strip().split('\t') head.insert(0,'ChipFileName') # fix R write.table b0rken-ness p[0] = '\t'.join(head) @@ -295,6 +303,7 @@ if not dataset.peek: dataset.set_peek() pk = dataset.peek # use the peek which is the pheno data insead of dataset (!) + ###this is probably not the best source, can we just access the raw data directly? if pk: p = pk.split('\n') h = p[0].strip().split('\t') # hope is header @@ -339,10 +348,6 @@ """Returns the mime type of the datatype""" return 'application/gzip' - def sniff(self): - """ can we be bothered looking for the signature or loading via rpy? - """ - return true class AffyBatch( RexpBase ): """derived class for BioC data structures in Galaxy """ diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py Wed Jul 15 12:18:43 2009 -0400 +++ b/lib/galaxy/datatypes/metadata.py Wed Jul 15 14:11:35 2009 -0400 @@ -212,12 +212,13 @@ is a MetadataSpecCollection) of datatype. """ - def __init__( self, datatype, name=None, desc=None, param=MetadataParameter, default=None, no_value = None, visible=True, **kwargs ): + def __init__( self, datatype, name=None, desc=None, param=MetadataParameter, default=None, no_value = None, visible=True, set_in_upload = False, **kwargs ): self.name = name self.desc = desc or name self.default = default self.no_value = no_value self.visible = visible + self.set_in_upload = set_in_upload # Catch-all, allows for extra attributes to be set self.__dict__.update(kwargs) #set up param last, as it uses values set above diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/datatypes/registry.py --- a/lib/galaxy/datatypes/registry.py Wed Jul 15 12:18:43 2009 -0400 +++ b/lib/galaxy/datatypes/registry.py Wed Jul 15 14:11:35 2009 -0400 @@ -308,3 +308,19 @@ def get_composite_extensions( self ): return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ] + def get_upload_metadata_params( self, context, group, tool ): + """Returns dict of case value:inputs for metadata conditional for upload tool""" + rval = {} + for ext, d_type in self.datatypes_by_extension.iteritems(): + inputs = [] + for meta_name, meta_spec in d_type.metadata_spec.iteritems(): + if meta_spec.set_in_upload: + help_txt = meta_spec.desc + if not help_txt or help_txt == meta_name: + help_txt = "" + inputs.append( '<param type="text" name="%s" label="Set metadata value for "%s"" value="%s" help="%s"/>' % ( meta_name, meta_name, meta_spec.default, help_txt ) ) + rval[ ext ] = "\n".join( inputs ) + if 'auto' not in rval and 'txt' in rval: #need to manually add 'auto' datatype + rval[ 'auto' ] = rval[ 'txt' ] + return rval + diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Wed Jul 15 12:18:43 2009 -0400 +++ b/lib/galaxy/tools/__init__.py Wed Jul 15 14:11:35 2009 -0400 @@ -596,18 +596,40 @@ elif elem.tag == "conditional": group = Conditional() group.name = elem.get( "name" ) - # Should have one child "input" which determines the case - input_elem = elem.find( "param" ) - assert input_elem is not None, "<conditional> must have a child <param>" - group.test_param = self.parse_param_elem( input_elem, enctypes, context ) - # Must refresh when test_param changes - group.test_param.refresh_on_change = True - # And a set of possible cases - for case_elem in elem.findall( "when" ): - case = ConditionalWhen() - case.value = case_elem.get( "value" ) - case.inputs = self.parse_input_elem( case_elem, enctypes, context ) - group.cases.append( case ) + + group.name = elem.get( "name" ) + + group.value_ref = elem.get( 'value_ref', None ) + group.value_ref_in_group = util.string_as_bool( elem.get( 'value_ref_in_group', 'True' ) ) + value_from = elem.get( "value_from" ) + if value_from: + value_from = value_from.split( ':' ) + group.value_from = locals().get( value_from[0] ) + group.test_param = rval[ group.value_ref ] + group.test_param.refresh_on_change = True + for attr in value_from[1].split( '.' ): + group.value_from = getattr( group.value_from, attr ) + for case_value, case_inputs in group.value_from( context, group, self ).iteritems(): + case = ConditionalWhen() + case.value = case_value + if case_inputs: + case.inputs = self.parse_input_elem( ElementTree.XML( "<when>%s</when>" % case_inputs ), enctypes, context ) + else: + case.inputs = {} + group.cases.append( case ) + else: + # Should have one child "input" which determines the case + input_elem = elem.find( "param" ) + assert input_elem is not None, "<conditional> must have a child <param>" + group.test_param = self.parse_param_elem( input_elem, enctypes, context ) + # Must refresh when test_param changes + group.test_param.refresh_on_change = True + # And a set of possible cases + for case_elem in elem.findall( "when" ): + case = ConditionalWhen() + case.value = case_elem.get( "value" ) + case.inputs = self.parse_input_elem( case_elem, enctypes, context ) + group.cases.append( case ) rval[group.name] = group elif elem.tag == "upload_dataset": group = UploadDataset() @@ -615,6 +637,7 @@ group.title = elem.get( "title" ) group.file_type_name = elem.get( 'file_type_name', group.file_type_name ) group.default_file_type = elem.get( 'default_file_type', group.default_file_type ) + group.metadata_ref = elem.get( 'metadata_ref', group.metadata_ref ) rval[ group.file_type_name ].refresh_on_change = True rval[ group.file_type_name ].refresh_on_change_values = self.app.datatypes_registry.get_composite_extensions() group.inputs = self.parse_input_elem( elem, enctypes, context ) @@ -917,7 +940,10 @@ old_current_case = group_state['__current_case__'] group_prefix = "%s|" % ( key ) # Deal with the 'test' element and see if it's value changed - test_param_key = group_prefix + input.test_param.name + if input.value_ref and not input.value_ref_in_group: #we are referencing an existant parameter, which is not part of this group + test_param_key = prefix + input.test_param.name + else: + test_param_key = group_prefix + input.test_param.name test_param_error = None test_incoming = get_incoming_value( incoming, test_param_key, None ) if test_param_key not in incoming \ diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/tools/actions/upload.py --- a/lib/galaxy/tools/actions/upload.py Wed Jul 15 12:18:43 2009 -0400 +++ b/lib/galaxy/tools/actions/upload.py Wed Jul 15 14:11:35 2009 -0400 @@ -46,21 +46,26 @@ uploaded_datasets = dataset_upload_input.get_uploaded_datasets( trans, incoming ) for uploaded_dataset in uploaded_datasets: precreated_dataset = self.get_precreated_dataset( uploaded_dataset.precreated_name ) - dataset = self.add_file( trans, uploaded_dataset.primary_file, uploaded_dataset.name, uploaded_dataset.file_type, uploaded_dataset.is_multi_byte, uploaded_dataset.dbkey, space_to_tab = uploaded_dataset.space_to_tab, info = uploaded_dataset.info, precreated_dataset = precreated_dataset ) - if uploaded_dataset.composite_files: + dataset = self.add_file( trans, uploaded_dataset.primary_file, uploaded_dataset.name, uploaded_dataset.file_type, uploaded_dataset.is_multi_byte, uploaded_dataset.dbkey, space_to_tab = uploaded_dataset.space_to_tab, info = uploaded_dataset.info, precreated_dataset = precreated_dataset, metadata = uploaded_dataset.metadata ) + composite_files = dataset.datatype.get_composite_files( dataset ) + if composite_files: os.mkdir( dataset.extra_files_path ) #make extra files path - for name, value in uploaded_dataset.composite_files.iteritems(): + for name, value in composite_files.iteritems(): #what about binary files here, need to skip converting newlines - if value is None and not dataset.datatype.writable_files[ name ].optional: + if uploaded_dataset.composite_files[ value.name ] is None and not value.optional: dataset.info = "A required composite data file was not provided (%s)" % name dataset.state = dataset.states.ERROR break - elif value is not None: - if value.space_to_tab: - sniff.convert_newlines_sep2tabs( value.filename ) + elif uploaded_dataset.composite_files[ value.name] is not None: + if uploaded_dataset.composite_files[ value.name ].space_to_tab: + sniff.convert_newlines_sep2tabs( uploaded_dataset.composite_files[ value.name ].filename ) else: - sniff.convert_newlines( value.filename ) - shutil.move( value.filename, os.path.join( dataset.extra_files_path, name ) ) + sniff.convert_newlines( uploaded_dataset.composite_files[ value.name ].filename ) + shutil.move( uploaded_dataset.composite_files[ value.name ].filename, os.path.join( dataset.extra_files_path, name ) ) + if dataset.datatype.composite_type == 'auto_primary_file': + #now that metadata is set, we should create the primary file as required + open( dataset.file_name, 'wb+' ).write( dataset.datatype.generate_primary_file( dataset = dataset ) ) + data_list.append( dataset ) #clean up extra temp names uploaded_dataset.clean_up_temp_files() @@ -125,7 +130,7 @@ trans.log_event( 'job id %d ended with errors, err_msg: %s' % ( job.id, err_msg ), tool_id=job.tool_id ) return dict( output=data ) - def add_file( self, trans, temp_name, file_name, file_type, is_multi_byte, dbkey, info=None, space_to_tab=False, precreated_dataset=None ): + def add_file( self, trans, temp_name, file_name, file_type, is_multi_byte, dbkey, info=None, space_to_tab=False, precreated_dataset=None, metadata = {} ): def dataset_no_data_error( data, message = 'there was an error uploading your file' ): data.info = "No data: %s." % message data.state = data.states.ERROR @@ -217,6 +222,7 @@ if trans.app.datatypes_registry.get_datatype_by_extension( file_type ).composite_type != 'auto_primary_file' and self.check_html( temp_name ): return dataset_no_data_error( data, message = "you attempted to upload an inappropriate file" ) #raise BadFileException( "you attempted to upload an inappropriate file." ) + #if data_type != 'binary' and data_type != 'zip' and not trans.app.datatypes_registry.get_datatype_by_extension( ext ).is_binary: if data_type != 'binary' and data_type != 'zip': if space_to_tab: self.line_count = sniff.convert_newlines_sep2tabs( temp_name ) @@ -235,9 +241,14 @@ data.info = info data.flush() shutil.move( temp_name, data.file_name ) - data.state = data.states.OK + ## FIXME + data.state = data.states.OK ##THIS SHOULD BE THE LAST THING DONE + #### its bad to set other things after this point, i.e. metadata and composite files...this creates a race condition where a dataset could be pushed into a job before its metadata, etc is set data.set_size() data.init_meta() + #need to set metadata, has to be done after extention is set + for meta_name, meta_value in metadata.iteritems(): + setattr( data.metadata, meta_name, meta_value ) if self.line_count is not None: try: if is_multi_byte: diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/tools/parameters/grouping.py --- a/lib/galaxy/tools/parameters/grouping.py Wed Jul 15 12:18:43 2009 -0400 +++ b/lib/galaxy/tools/parameters/grouping.py Wed Jul 15 14:11:35 2009 -0400 @@ -92,6 +92,7 @@ self.file_type_name = 'file_type' self.default_file_type = 'txt' self.file_type_to_ext = { 'auto':self.default_file_type } + self.metadata_ref = 'files_metadata' def get_file_type( self, context ): return context.get( self.file_type_name, self.default_file_type ) def get_datatype_ext( self, trans, context ): @@ -297,6 +298,7 @@ self.composite_files = odict() self.dbkey = None self.warnings = [] + self.metadata = {} self._temp_filenames = [] #store all created filenames here, delete on cleanup def register_temp_file( self, filename ): @@ -333,6 +335,13 @@ dataset.datatype = d_type dataset.dbkey = dbkey + #load metadata + files_metadata = context.get( self.metadata_ref, {} ) + for meta_name, meta_spec in d_type.metadata_spec.iteritems(): + if meta_spec.set_in_upload: + if meta_name in files_metadata: + dataset.metadata[ meta_name ] = files_metadata[ meta_name ] + temp_name = None precreated_name = None is_multi_byte = False @@ -359,10 +368,10 @@ dataset.warnings.extend( warnings ) dataset.register_temp_file( temp_name ) - keys = writable_files.keys() + keys = [ value.name for value in writable_files.values() ] for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ): key = keys[ i + writable_files_offset ] - if group_incoming is None and not writable_files[ key ].optional: + if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) dataset.composite_files[ key ] = None else: @@ -372,7 +381,7 @@ dataset.register_temp_file( temp_name ) else: dataset.composite_files[ key ] = None - if not writable_files[ key ].optional: + if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) return [ dataset ] else: @@ -404,6 +413,8 @@ Group.__init__( self ) self.test_param = None self.cases = [] + self.value_ref = None + self.value_ref_in_group = True #When our test_param is not part of the conditional Group, this is False def get_current_case( self, value, trans ): # Convert value to user representation str_value = self.test_param.filter_value( value, trans ) @@ -460,4 +471,4 @@ class ConditionalWhen( object ): def __init__( self ): self.value = None - self.inputs = None \ No newline at end of file + self.inputs = None diff -r dacc94994979 -r dd5f1fe8f5e9 templates/tool_form.mako --- a/templates/tool_form.mako Wed Jul 15 12:18:43 2009 -0400 +++ b/templates/tool_form.mako Wed Jul 15 14:11:35 2009 -0400 @@ -102,7 +102,9 @@ current_case = group_state['__current_case__'] group_prefix = prefix + input.name + "|" %> - ${row_for_param( group_prefix, input.test_param, group_state, group_errors, other_values )} + %if input.value_ref_in_group: + ${row_for_param( group_prefix, input.test_param, group_state, group_errors, other_values )} + %endif ${do_inputs( input.cases[current_case].inputs, group_state, group_errors, group_prefix, other_values )} %elif input.type == "upload_dataset": %if input.get_datatype( trans, other_values ).composite_type is None: #have non-composite upload appear as before diff -r dacc94994979 -r dd5f1fe8f5e9 tools/data_source/upload.xml --- a/tools/data_source/upload.xml Wed Jul 15 12:18:43 2009 -0400 +++ b/tools/data_source/upload.xml Wed Jul 15 14:11:35 2009 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> -<tool name="Upload File" id="upload1" version="1.0.1"> +<tool name="Upload File" id="upload1" version="1.0.2"> <description> from your computer </description> @@ -15,7 +15,7 @@ </options> </param> <param name="async_datasets" type="hidden" value="None"/> - <upload_dataset name="files" title="Specify Files for Dataset" file_type_name="file_type"> + <upload_dataset name="files" title="Specify Files for Dataset" file_type_name="file_type" metadata_ref="files_metadata"> <param name="file_data" type="file" size="30" label="File" ajax-upload="true"> <validator type="expression" message="You will need to reselect the file you specified (%s)." substitute_value_in_message="True">not ( ( isinstance( value, unicode ) or isinstance( value, str ) ) and value != "" )</validator> <!-- use validator to post message to user about needing to reselect the file, since most browsers won't accept the value attribute for file inputs --> </param> @@ -25,6 +25,7 @@ </param> </upload_dataset> <param name="dbkey" type="genomebuild" label="Genome" /> + <conditional name="files_metadata" title="Specify metadata" value_from="self:app.datatypes_registry.get_upload_metadata_params" value_ref="file_type" value_ref_in_group="False" /> <!-- <param name="other_dbkey" type="text" label="Or user-defined Genome" /> --> </inputs> <help>