details: http://www.bx.psu.edu/hg/galaxy/rev/aa1088e4c6c6 changeset: 1531:aa1088e4c6c6 user: Dan Blankenberg <dan@bx.psu.edu> date: Thu Sep 25 15:03:05 2008 -0400 description: Enhance the way dataset.set_meta() behaves, by adding a flag 'overwrite', which when False will only overwrite metadata values that are not previously set or are not accessable for user modification. 7 file(s) affected in this change: lib/galaxy/datatypes/data.py lib/galaxy/datatypes/interval.py lib/galaxy/datatypes/metadata.py lib/galaxy/datatypes/registry.py lib/galaxy/datatypes/sequence.py lib/galaxy/datatypes/tabular.py lib/galaxy/jobs/__init__.py diffs (269 lines): diff -r 58ea8585bd88 -r aa1088e4c6c6 lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py Thu Sep 25 10:19:56 2008 -0400 +++ b/lib/galaxy/datatypes/data.py Thu Sep 25 15:03:05 2008 -0400 @@ -81,7 +81,7 @@ # flag the object as modified for SQLAlchemy. if copy_from: dataset.metadata = copy_from.metadata - def set_meta( self, dataset, **kwd ): + def set_meta( self, dataset, overwrite = True, **kwd ): """Unimplemented method, allows guessing of metadata from contents of file""" return True def set_readonly_meta( self, dataset ): diff -r 58ea8585bd88 -r aa1088e4c6c6 lib/galaxy/datatypes/interval.py --- a/lib/galaxy/datatypes/interval.py Thu Sep 25 10:19:56 2008 -0400 +++ b/lib/galaxy/datatypes/interval.py Thu Sep 25 15:03:05 2008 -0400 @@ -64,8 +64,8 @@ else: dataset.blurb = "%s regions" % util.commaify( str( line_count ) ) - def set_meta( self, dataset, first_line_is_header=False, **kwd ): - Tabular.set_meta( self, dataset, skip=0 ) + def set_meta( self, dataset, overwrite = True, first_line_is_header = False, **kwd ): + Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 0 ) """Tries to guess from the line the location number of the column for the chromosome, region start-end and strand""" if dataset.has_data(): @@ -80,7 +80,8 @@ for index, col_name in enumerate( elems ): if col_name in valid: meta_name = valid[col_name] - setattr( dataset.metadata, meta_name, index+1 ) + if overwrite or not dataset.metadata.element_is_set( meta_name ): + setattr( dataset.metadata, meta_name, index+1 ) values = alias_spec[ meta_name ] start = values.index( col_name ) for lower in values[ start: ]: @@ -94,26 +95,32 @@ if len( elems ) > 2: for str in data.col1_startswith: if line.lower().startswith( str ): - dataset.metadata.chromCol = 1 + if overwrite or not dataset.metadata.element_is_set( 'chromCol' ): + dataset.metadata.chromCol = 1 try: int( elems[1] ) - dataset.metadata.startCol = 2 + if overwrite or not dataset.metadata.element_is_set( 'startCol' ): + dataset.metadata.startCol = 2 except: pass # Metadata default will be used try: int( elems[2] ) - dataset.metadata.endCol = 3 + if overwrite or not dataset.metadata.element_is_set( 'endCol' ): + dataset.metadata.endCol = 3 except: pass # Metadata default will be used if len( elems ) > 3: try: int( elems[3] ) except: - dataset.metadata.nameCol = 4 + if overwrite or not dataset.metadata.element_is_set( 'nameCol' ): + dataset.metadata.nameCol = 4 if len( elems ) < 6 or elems[5] not in data.valid_strand: - dataset.metadata.strandCol = 0 + if overwrite or not dataset.metadata.element_is_set( 'strandCol' ): + dataset.metadata.strandCol = 0 else: - dataset.metadata.strandCol = 6 + if overwrite or not dataset.metadata.element_is_set( 'strandCol' ): + dataset.metadata.strandCol = 6 metadata_is_set = True break if metadata_is_set: @@ -286,8 +293,9 @@ MetadataElement( name="endCol", default=3, desc="End column", param=metadata.ColumnParameter ) MetadataElement( name="strandCol", desc="Strand column (click box & select)", param=metadata.ColumnParameter, optional=True, no_value=0 ) MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False ) + ###do we need to repeat these? they are the same as should be inherited from interval type - def set_meta( self, dataset, **kwd ): + def set_meta( self, dataset, overwrite = True, **kwd ): """Sets the metadata information for datasets previously determined to be in bed format.""" i = 0 if dataset.has_data(): @@ -300,15 +308,18 @@ for startswith in data.col1_startswith: if line.lower().startswith( startswith ): if len( elems ) > 3: - dataset.metadata.nameCol = 4 + if overwrite or not dataset.metadata.element_is_set( 'nameCol' ): + dataset.metadata.nameCol = 4 if len(elems) < 6: - dataset.metadata.strandCol = 0 + if overwrite or not dataset.metadata.element_is_set( 'strandCol' ): + dataset.metadata.strandCol = 0 else: - dataset.metadata.strandCol = 6 + if overwrite or not dataset.metadata.element_is_set( 'strandCol' ): + dataset.metadata.strandCol = 6 metadata_set = True break if metadata_set: break - Tabular.set_meta( self, dataset, skip=i ) + Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i ) def as_ucsc_display_file( self, dataset, **kwd ): """Returns file contents with only the bed data. If bed 6+, treat as interval.""" @@ -443,7 +454,7 @@ Tabular.__init__(self, **kwd) self.add_display_app ( 'elegans', 'display in GBrowse', 'as_gbrowse_display_file', 'gbrowse_links' ) - def set_meta( self, dataset, **kwd ): + def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): line = line.rstrip('\r\n') @@ -456,7 +467,7 @@ break except: pass - Tabular.set_meta( self, dataset, skip=i ) + Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i ) def make_html_table( self, dataset, skipchars=[] ): """Create HTML table, used for displaying peek""" @@ -589,7 +600,7 @@ """Initialize datatype, by adding GBrowse display app""" Gff.__init__(self, **kwd) - def set_meta( self, dataset, **kwd ): + def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): line = line.rstrip('\r\n') @@ -614,7 +625,7 @@ phase = elems[7] if valid_start and valid_end and start < end and strand in self.valid_gff3_strand and phase in self.valid_gff3_phase: break - Tabular.set_meta( self, dataset, skip=i ) + Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i ) def sniff( self, filename ): """ @@ -692,7 +703,7 @@ def make_html_table( self, dataset ): return Tabular.make_html_table( self, dataset, skipchars=['track', '#'] ) - def set_meta( self, dataset, **kwd ): + def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): line = line.rstrip('\r\n') @@ -705,7 +716,7 @@ for str in data.col1_startswith: if elems[0].lower().startswith(str): break - Tabular.set_meta( self, dataset, skip=i ) + Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i ) def sniff( self, filename ): """ @@ -746,8 +757,8 @@ """Initialize interval datatype, by adding UCSC display app""" Tabular.__init__(self, **kwd) self.add_display_app ( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' ) - def set_meta( self, dataset, **kwd ): - Tabular.set_meta( self, dataset, skip=1 ) + def set_meta( self, dataset, overwrite = True, **kwd ): + Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 1 ) def display_peek( self, dataset ): """Returns formated html of peek""" return Tabular.make_html_table( self, dataset, skipchars=['track', '#'] ) @@ -858,8 +869,8 @@ Tabular.__init__(self, **kwd) self.add_display_app ('elegans', 'display in GBrowse', 'as_gbrowse_display_file', 'gbrowse_links' ) - def set_meta( self, dataset, **kwd ): - Tabular.set_meta( self, dataset, skip=1 ) + def set_meta( self, dataset, overwrite = True, **kwd ): + Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 1 ) def make_html_table( self, dataset ): return Tabular.make_html_table( self, dataset, skipchars=['track', '#'] ) diff -r 58ea8585bd88 -r aa1088e4c6c6 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py Thu Sep 25 10:19:56 2008 -0400 +++ b/lib/galaxy/datatypes/metadata.py Thu Sep 25 15:03:05 2008 -0400 @@ -185,6 +185,8 @@ self.parent._metadata = value else: self.bunch[name] = value + def element_is_set( self, name ): + return bool( self.bunch.get( name, False ) ) MetadataElement = Statement(MetadataElementSpec) diff -r 58ea8585bd88 -r aa1088e4c6c6 lib/galaxy/datatypes/registry.py --- a/lib/galaxy/datatypes/registry.py Thu Sep 25 10:19:56 2008 -0400 +++ b/lib/galaxy/datatypes/registry.py Thu Sep 25 15:03:05 2008 -0400 @@ -190,8 +190,7 @@ # initialization. if data.has_data(): data.init_meta( copy_from=data ) - if isinstance( data.datatype, tabular.Tabular ): - data.set_readonly_meta() + data.set_meta( overwrite = False ) data.set_peek() return data diff -r 58ea8585bd88 -r aa1088e4c6c6 lib/galaxy/datatypes/sequence.py --- a/lib/galaxy/datatypes/sequence.py Thu Sep 25 10:19:56 2008 -0400 +++ b/lib/galaxy/datatypes/sequence.py Thu Sep 25 15:03:05 2008 -0400 @@ -164,7 +164,7 @@ def init_meta( self, dataset, copy_from=None ): Alignment.init_meta( self, dataset, copy_from=copy_from ) - def set_meta( self, dataset, first_line_is_header=False, **kwd ): + def set_meta( self, dataset, overwrite = True, **kwd ): """ Parses and sets species and chromosomes from MAF files. """ @@ -190,6 +190,7 @@ if i > 100000: break except: pass + #these metadata values are not accessable by users, always overwrite dataset.metadata.species = species dataset.metadata.species_chromosomes = species_chromosomes diff -r 58ea8585bd88 -r aa1088e4c6c6 lib/galaxy/datatypes/tabular.py --- a/lib/galaxy/datatypes/tabular.py Thu Sep 25 10:19:56 2008 -0400 +++ b/lib/galaxy/datatypes/tabular.py Thu Sep 25 15:03:05 2008 -0400 @@ -26,8 +26,8 @@ data.Text.init_meta( self, dataset, copy_from=copy_from ) def set_readonly_meta( self, dataset, skip=1, **kwd ): """Resets the values of readonly metadata elements.""" - Tabular.set_meta( self, dataset, skip=skip ) - def set_meta( self, dataset, skip=1, **kwd ): + Tabular.set_meta( self, dataset, overwrite = True, skip = skip ) + def set_meta( self, dataset, overwrite = True, skip = 1, **kwd ): """ Tries to determine the number of columns as well as those columns that contain numerical values in the dataset. A skip parameter is @@ -35,6 +35,7 @@ their data type classes are responsible to determine how many invalid comment lines should be skipped. """ + #we treat 'overwrite' as always True (we always want to set tabular metadata when called) if dataset.has_data(): column_types = [] diff -r 58ea8585bd88 -r aa1088e4c6c6 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Thu Sep 25 10:19:56 2008 -0400 +++ b/lib/galaxy/jobs/__init__.py Thu Sep 25 15:03:05 2008 -0400 @@ -409,12 +409,7 @@ dataset.blurb = "error" elif dataset.has_data(): # Only set metadata values if they are missing... - if dataset.missing_meta(): - dataset.set_meta() - else: - # ...however, some tools add / remove columns, - # so we have to reset the readonly metadata values - dataset.set_readonly_meta() + dataset.set_meta( overwrite_exisiting = False ) dataset.set_peek() else: dataset.blurb = "empty"