2 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/b13e85effb67/ Changeset: b13e85effb67 User: dan Date: 2013-05-24 19:04:32 Summary: Fix for MetadataInDataTableColumnValidator becoming stale when a tool data table is updated during Galaxy runtime. Affected #: 3 files diff -r 7e820c43d641d784d73526637e3d55ede5f8327e -r b13e85effb678a5f820630bee89a6a233299b5b8 lib/galaxy/tools/data/__init__.py --- a/lib/galaxy/tools/data/__init__.py +++ b/lib/galaxy/tools/data/__init__.py @@ -161,10 +161,23 @@ self.tool_data_file = None self.tool_data_path = tool_data_path self.missing_index_file = None + # increment this variable any time a new entry is added, or when the table is totally reloaded + # This value has no external meaning, and does not represent an abstract version of the underlying data + self._loaded_content_version = 1 def get_empty_field_by_name( self, name ): return self.empty_field_values.get( name, self.empty_field_value ) - + + def _add_entry( self, entry, persist=False, persist_on_error=False, **kwd ): + raise NotImplementedError( "Abstract method" ) + + def add_entry( self, entry, persist=False, persist_on_error=False, **kwd ): + self._add_entry( entry, persist=persist, persist_on_error=persist_on_error, **kwd ) + self._loaded_content_version += 1 + return self._loaded_content_version + + def is_current_version( self, other_version ): + return self._loaded_content_version == other_version class TabularToolDataTable( ToolDataTable ): """ @@ -234,6 +247,9 @@ def get_fields( self ): return self.data + + def get_version_fields( self ): + return ( self._loaded_content_version, self.data ) def parse_column_spec( self, config_element ): """ @@ -324,6 +340,61 @@ rval = fields[ return_col ] break return rval - + + def _add_entry( self, entry, persist=False, persist_on_error=False, **kwd ): + #accepts dict or list of columns + if isinstance( entry, dict ): + fields = [] + for column_name in self.get_column_name_list(): + if column_name not in entry: + log.debug( "Using default column value for column '%s' when adding data table entry (%s) to table '%s'.", column_name, entry, self.name ) + field_value = self.get_empty_field_by_name( column_name ) + else: + field_value = entry[ column_name ] + fields.append( field_value ) + else: + fields = entry + if self.largest_index < len( fields ): + fields = self._replace_field_separators( fields ) + self.data.append( fields ) + field_len_error = False + else: + log.error( "Attempted to add fields (%s) to data table '%s', but there were not enough fields specified ( %i < %i ).", fields, self.name, len( fields ), self.largest_index + 1 ) + field_len_error = True + if persist and ( not field_len_error or persist_on_error ): + #FIXME: Need to lock these files for editing + try: + data_table_fh = open( self.filename, 'r+b' ) + except IOError, e: + log.warning( 'Error opening data table file (%s) with r+b, assuming file does not exist and will open as wb: %s', self.filename, e ) + data_table_fh = open( self.filename, 'wb' ) + if os.stat( self.filename )[6] != 0: + # ensure last existing line ends with new line + data_table_fh.seek( -1, 2 ) #last char in file + last_char = data_table_fh.read( 1 ) + if last_char not in [ '\n', '\r' ]: + data_table_fh.write( '\n' ) + data_table_fh.write( "%s\n" % ( self.separator.join( fields ) ) ) + return not field_len_error + + def _replace_field_separators( self, fields, separator=None, replace=None, comment_char=None ): + #make sure none of the fields contain separator + #make sure separator replace is different from comment_char, + #due to possible leading replace + if separator is None: + separator = self.separator + if replace is None: + if separator == " ": + if comment_char == "\t": + replace = "_" + else: + replace = "\t" + else: + if comment_char == " ": + replace = "_" + else: + replace = " " + return map( lambda x: x.replace( separator, replace ), fields ) + # Registry of tool data types by type_key tool_data_table_types = dict( [ ( cls.type_key, cls ) for cls in [ TabularToolDataTable ] ] ) diff -r 7e820c43d641d784d73526637e3d55ede5f8327e -r b13e85effb678a5f820630bee89a6a233299b5b8 lib/galaxy/tools/data_manager/manager.py --- a/lib/galaxy/tools/data_manager/manager.py +++ b/lib/galaxy/tools/data_manager/manager.py @@ -233,59 +233,21 @@ assert output_ref_dataset is not None, "Referenced output was not found." output_ref_values[ data_table_column ] = output_ref_dataset - final_data_table_values = [] if not isinstance( data_table_values, list ): data_table_values = [ data_table_values ] - columns = data_table.get_column_name_list() - #FIXME: Need to lock these files for editing - try: - data_table_fh = open( data_table.filename, 'r+b' ) - except IOError, e: - log.warning( 'Error opening data table file (%s) with r+b, assuming file does not exist and will open as wb: %s' % ( data_table.filename, e ) ) - data_table_fh = open( data_table.filename, 'wb' ) - if os.stat( data_table.filename )[6] != 0: - # ensure last existing line ends with new line - data_table_fh.seek( -1, 2 ) #last char in file - last_char = data_table_fh.read() - if last_char not in [ '\n', '\r' ]: - data_table_fh.write( '\n' ) for data_table_row in data_table_values: data_table_value = dict( **data_table_row ) #keep original values here for name, value in data_table_row.iteritems(): #FIXME: need to loop through here based upon order listed in data_manager config if name in output_ref_values: moved = self.process_move( data_table_name, name, output_ref_values[ name ].extra_files_path, **data_table_value ) data_table_value[ name ] = self.process_value_translation( data_table_name, name, **data_table_value ) - final_data_table_values.append( data_table_value ) - fields = [] - for column_name in columns: - if column_name is None or column_name not in data_table_value: - fields.append( data_table.get_empty_field_by_name( column_name ) ) - else: - fields.append( data_table_value[ column_name ] ) - #should we add a comment to file about automatically generated value here? - data_table_fh.write( "%s\n" % ( data_table.separator.join( self._replace_field_separators( fields, separator=data_table.separator ) ) ) ) #write out fields to disk - data_table.data.append( fields ) #add fields to loaded data table - data_table_fh.close() + data_table.add_entry( data_table_value, persist=True ) + for data_table_name, data_table_values in data_tables_dict.iteritems(): #tool returned extra data table entries, but data table was not declared in data manager #do not add these values, but do provide messages log.warning( 'The data manager "%s" returned an undeclared data table "%s" with new entries "%s". These entries will not be created. Please confirm that an entry for "%s" exists in your "%s" file.' % ( self.id, data_table_name, data_table_values, data_table_name, self.data_managers.filename ) ) - def _replace_field_separators( self, fields, separator="\t", replace=None, comment_char=None ): - #make sure none of the fields contain separator - #make sure separator replace is different from comment_char, - #due to possible leading replace - if replace is None: - if separator == " ": - if comment_char == "\t": - replace = "_" - else: - replace = "\t" - else: - if comment_char == " ": - replace = "_" - else: - replace = " " - return map( lambda x: x.replace( separator, replace ), fields ) + def process_move( self, data_table_name, column_name, source_base_path, relative_symlinks=False, **kwd ): if data_table_name in self.move_by_data_table_column and column_name in self.move_by_data_table_column[ data_table_name ]: move_dict = self.move_by_data_table_column[ data_table_name ][ column_name ] diff -r 7e820c43d641d784d73526637e3d55ede5f8327e -r b13e85effb678a5f820630bee89a6a233299b5b8 lib/galaxy/tools/parameters/validation.py --- a/lib/galaxy/tools/parameters/validation.py +++ b/lib/galaxy/tools/parameters/validation.py @@ -293,18 +293,31 @@ if line_startswith: line_startswith = line_startswith.strip() return cls( tool_data_table, metadata_name, metadata_column, message, line_startswith ) + def __init__( self, tool_data_table, metadata_name, metadata_column, message="Value for metadata not found.", line_startswith=None ): self.metadata_name = metadata_name self.message = message self.valid_values = [] + self._data_table_content_version = None + self._tool_data_table = tool_data_table if isinstance( metadata_column, basestring ): metadata_column = tool_data_table.columns[ metadata_column ] - for fields in tool_data_table.get_fields(): - if metadata_column < len( fields ): - self.valid_values.append( fields[ metadata_column ] ) + self._metadata_column = metadata_column + self._load_values() + + def _load_values( self ): + self._data_table_content_version, data_fields = self._tool_data_table.get_version_fields() + self.valid_values = [] + for fields in data_fields: + if self._metadata_column < len( fields ): + self.valid_values.append( fields[ self._metadata_column ] ) + def validate( self, value, history = None ): if not value: return if hasattr( value, "metadata" ): + if not self._tool_data_table.is_current_version( self._data_table_content_version ): + log.debug( 'MetadataInDataTableColumnValidator values are out of sync with data table (%s), updating validator.', self._tool_data_table.name ) + self._load_values() if value.metadata.spec[self.metadata_name].param.to_string( value.metadata.get( self.metadata_name ) ) in self.valid_values: return raise ValueError( self.message ) https://bitbucket.org/galaxy/galaxy-central/commits/ec277b165b13/ Changeset: ec277b165b13 Branch: next-stable User: dan Date: 2013-05-24 19:04:32 Summary: Fix for MetadataInDataTableColumnValidator becoming stale when a tool data table is updated during Galaxy runtime. Affected #: 3 files diff -r 08b05ff1c7cfaad179d25970df0b740da282550e -r ec277b165b13131d02998c3cbab3b3e664141a58 lib/galaxy/tools/data/__init__.py --- a/lib/galaxy/tools/data/__init__.py +++ b/lib/galaxy/tools/data/__init__.py @@ -161,10 +161,23 @@ self.tool_data_file = None self.tool_data_path = tool_data_path self.missing_index_file = None + # increment this variable any time a new entry is added, or when the table is totally reloaded + # This value has no external meaning, and does not represent an abstract version of the underlying data + self._loaded_content_version = 1 def get_empty_field_by_name( self, name ): return self.empty_field_values.get( name, self.empty_field_value ) - + + def _add_entry( self, entry, persist=False, persist_on_error=False, **kwd ): + raise NotImplementedError( "Abstract method" ) + + def add_entry( self, entry, persist=False, persist_on_error=False, **kwd ): + self._add_entry( entry, persist=persist, persist_on_error=persist_on_error, **kwd ) + self._loaded_content_version += 1 + return self._loaded_content_version + + def is_current_version( self, other_version ): + return self._loaded_content_version == other_version class TabularToolDataTable( ToolDataTable ): """ @@ -234,6 +247,9 @@ def get_fields( self ): return self.data + + def get_version_fields( self ): + return ( self._loaded_content_version, self.data ) def parse_column_spec( self, config_element ): """ @@ -324,6 +340,61 @@ rval = fields[ return_col ] break return rval - + + def _add_entry( self, entry, persist=False, persist_on_error=False, **kwd ): + #accepts dict or list of columns + if isinstance( entry, dict ): + fields = [] + for column_name in self.get_column_name_list(): + if column_name not in entry: + log.debug( "Using default column value for column '%s' when adding data table entry (%s) to table '%s'.", column_name, entry, self.name ) + field_value = self.get_empty_field_by_name( column_name ) + else: + field_value = entry[ column_name ] + fields.append( field_value ) + else: + fields = entry + if self.largest_index < len( fields ): + fields = self._replace_field_separators( fields ) + self.data.append( fields ) + field_len_error = False + else: + log.error( "Attempted to add fields (%s) to data table '%s', but there were not enough fields specified ( %i < %i ).", fields, self.name, len( fields ), self.largest_index + 1 ) + field_len_error = True + if persist and ( not field_len_error or persist_on_error ): + #FIXME: Need to lock these files for editing + try: + data_table_fh = open( self.filename, 'r+b' ) + except IOError, e: + log.warning( 'Error opening data table file (%s) with r+b, assuming file does not exist and will open as wb: %s', self.filename, e ) + data_table_fh = open( self.filename, 'wb' ) + if os.stat( self.filename )[6] != 0: + # ensure last existing line ends with new line + data_table_fh.seek( -1, 2 ) #last char in file + last_char = data_table_fh.read( 1 ) + if last_char not in [ '\n', '\r' ]: + data_table_fh.write( '\n' ) + data_table_fh.write( "%s\n" % ( self.separator.join( fields ) ) ) + return not field_len_error + + def _replace_field_separators( self, fields, separator=None, replace=None, comment_char=None ): + #make sure none of the fields contain separator + #make sure separator replace is different from comment_char, + #due to possible leading replace + if separator is None: + separator = self.separator + if replace is None: + if separator == " ": + if comment_char == "\t": + replace = "_" + else: + replace = "\t" + else: + if comment_char == " ": + replace = "_" + else: + replace = " " + return map( lambda x: x.replace( separator, replace ), fields ) + # Registry of tool data types by type_key tool_data_table_types = dict( [ ( cls.type_key, cls ) for cls in [ TabularToolDataTable ] ] ) diff -r 08b05ff1c7cfaad179d25970df0b740da282550e -r ec277b165b13131d02998c3cbab3b3e664141a58 lib/galaxy/tools/data_manager/manager.py --- a/lib/galaxy/tools/data_manager/manager.py +++ b/lib/galaxy/tools/data_manager/manager.py @@ -233,59 +233,21 @@ assert output_ref_dataset is not None, "Referenced output was not found." output_ref_values[ data_table_column ] = output_ref_dataset - final_data_table_values = [] if not isinstance( data_table_values, list ): data_table_values = [ data_table_values ] - columns = data_table.get_column_name_list() - #FIXME: Need to lock these files for editing - try: - data_table_fh = open( data_table.filename, 'r+b' ) - except IOError, e: - log.warning( 'Error opening data table file (%s) with r+b, assuming file does not exist and will open as wb: %s' % ( data_table.filename, e ) ) - data_table_fh = open( data_table.filename, 'wb' ) - if os.stat( data_table.filename )[6] != 0: - # ensure last existing line ends with new line - data_table_fh.seek( -1, 2 ) #last char in file - last_char = data_table_fh.read() - if last_char not in [ '\n', '\r' ]: - data_table_fh.write( '\n' ) for data_table_row in data_table_values: data_table_value = dict( **data_table_row ) #keep original values here for name, value in data_table_row.iteritems(): #FIXME: need to loop through here based upon order listed in data_manager config if name in output_ref_values: moved = self.process_move( data_table_name, name, output_ref_values[ name ].extra_files_path, **data_table_value ) data_table_value[ name ] = self.process_value_translation( data_table_name, name, **data_table_value ) - final_data_table_values.append( data_table_value ) - fields = [] - for column_name in columns: - if column_name is None or column_name not in data_table_value: - fields.append( data_table.get_empty_field_by_name( column_name ) ) - else: - fields.append( data_table_value[ column_name ] ) - #should we add a comment to file about automatically generated value here? - data_table_fh.write( "%s\n" % ( data_table.separator.join( self._replace_field_separators( fields, separator=data_table.separator ) ) ) ) #write out fields to disk - data_table.data.append( fields ) #add fields to loaded data table - data_table_fh.close() + data_table.add_entry( data_table_value, persist=True ) + for data_table_name, data_table_values in data_tables_dict.iteritems(): #tool returned extra data table entries, but data table was not declared in data manager #do not add these values, but do provide messages log.warning( 'The data manager "%s" returned an undeclared data table "%s" with new entries "%s". These entries will not be created. Please confirm that an entry for "%s" exists in your "%s" file.' % ( self.id, data_table_name, data_table_values, data_table_name, self.data_managers.filename ) ) - def _replace_field_separators( self, fields, separator="\t", replace=None, comment_char=None ): - #make sure none of the fields contain separator - #make sure separator replace is different from comment_char, - #due to possible leading replace - if replace is None: - if separator == " ": - if comment_char == "\t": - replace = "_" - else: - replace = "\t" - else: - if comment_char == " ": - replace = "_" - else: - replace = " " - return map( lambda x: x.replace( separator, replace ), fields ) + def process_move( self, data_table_name, column_name, source_base_path, relative_symlinks=False, **kwd ): if data_table_name in self.move_by_data_table_column and column_name in self.move_by_data_table_column[ data_table_name ]: move_dict = self.move_by_data_table_column[ data_table_name ][ column_name ] diff -r 08b05ff1c7cfaad179d25970df0b740da282550e -r ec277b165b13131d02998c3cbab3b3e664141a58 lib/galaxy/tools/parameters/validation.py --- a/lib/galaxy/tools/parameters/validation.py +++ b/lib/galaxy/tools/parameters/validation.py @@ -293,18 +293,31 @@ if line_startswith: line_startswith = line_startswith.strip() return cls( tool_data_table, metadata_name, metadata_column, message, line_startswith ) + def __init__( self, tool_data_table, metadata_name, metadata_column, message="Value for metadata not found.", line_startswith=None ): self.metadata_name = metadata_name self.message = message self.valid_values = [] + self._data_table_content_version = None + self._tool_data_table = tool_data_table if isinstance( metadata_column, basestring ): metadata_column = tool_data_table.columns[ metadata_column ] - for fields in tool_data_table.get_fields(): - if metadata_column < len( fields ): - self.valid_values.append( fields[ metadata_column ] ) + self._metadata_column = metadata_column + self._load_values() + + def _load_values( self ): + self._data_table_content_version, data_fields = self._tool_data_table.get_version_fields() + self.valid_values = [] + for fields in data_fields: + if self._metadata_column < len( fields ): + self.valid_values.append( fields[ self._metadata_column ] ) + def validate( self, value, history = None ): if not value: return if hasattr( value, "metadata" ): + if not self._tool_data_table.is_current_version( self._data_table_content_version ): + log.debug( 'MetadataInDataTableColumnValidator values are out of sync with data table (%s), updating validator.', self._tool_data_table.name ) + self._load_values() if value.metadata.spec[self.metadata_name].param.to_string( value.metadata.get( self.metadata_name ) ) in self.valid_values: return raise ValueError( self.message ) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.