galaxy-dist commit cd941e492bc0: Two missing files from previous commit (tool data tables) - galaxy-commits

20 Aug 2010

# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User James Taylor james@jamestaylor.org
# Date 1278615679 14400
# Node ID cd941e492bc0bc6930b8ac32190459d9d536078b
# Parent  2447b9a4dae30b17df089290a6471f3401fa5f78
Two missing files from previous commit (tool data tables)

--- /dev/null
+++ b/tool_data_table_conf.xml.sample
@@ -0,0 +1,6 @@
+<tables>
+    <table name="indexed_maf_files">
+        <column_names>name, value, dbkey, species</column_names>
+        <file name="tool-data/maf_index.loc" />
+    </table>
+</tables>
--- /dev/null
+++ b/lib/galaxy/tools/data/__init__.py
@@ -0,0 +1,131 @@
+"""
+Manage tool data tables, which store (at the application level) data that is
+used by tools, for example in the generation of dynamic options. Tables are
+loaded and stored by names which tools use to refer to them. This allows
+users to configure data tables for a local Galaxy instance without needing
+to modify the tool configurations. 
+"""
+
+import logging, sys, os.path
+from galaxy import util
+
+log = logging.getLogger( __name__ )
+
+class ToolDataTableManager( object ):
+    """
+    Manages a collection of tool data tables
+    """
+    
+    def __init__( self, config_filename=None ):
+        self.data_tables = {}
+        if config_filename:
+            self.add_from_config_file( config_filename )
+        
+    def __getitem__( self, key ):
+        return self.data_tables.__getitem__( key )
+        
+    def __contains__( self, key ):
+        return self.data_tables.__contains__( key )
+        
+    def add_from_config_file( self, config_filename ):
+        tree = util.parse_xml( config_filename )
+        root = tree.getroot()
+        for table_elem in root.findall( 'table' ):
+            type = table_elem.get( 'type', 'tabular' )
+            assert type in tool_data_table_types, "Unknown data table type '%s'" % type
+            table = tool_data_table_types[ type ]( table_elem )
+            self.data_tables[ table.name ] = table
+            log.debug( "Loaded tool data table '%s", table.name )
+        print >> sys.stderr, repr( self.data_tables )
+    
+class ToolDataTable( object ):
+    def __init__( self, config_element ):
+        self.name = config_element.get( 'name' )
+    
+class TabularToolDataTable( ToolDataTable ):
+    """
+    Data stored in a tabular / separated value format on disk, allows multiple
+    files to be merged but all must have the same column definitions.
+    
+    <table type="tabular" name="test">
+        <column name='...' index = '...' />
+        <file path="..." />
+        <file path="..." />
+    </table>
+    """
+    
+    type_key = 'tabular'
+    
+    def __init__( self, config_element ):
+        super( TabularToolDataTable, self ).__init__( config_element )
+        self.configure_and_load( config_element )
+    
+    def configure_and_load( self, config_element ):
+        """
+        Configure and load table from an XML element.
+        """
+        self.separator = config_element.get( 'separator', '\t' )
+        self.comment_char = config_element.get( 'comment_char', '#' )
+        # Configure columns
+        self.parse_column_spec( config_element )
+        # Read every file
+        all_rows = []
+        for file_element in config_element.findall( 'file' ):
+            filename = file_element.get( 'path' )
+            assert os.path.exists( filename ), \
+                "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name )
+            all_rows.extend( self.parse_file_fields( open( filename ) ) )
+        self.data = all_rows
+        
+    def get_fields( self ):
+        return self.data
+            
+    def parse_column_spec( self, config_element ):
+        """
+        Parse column definitions, which can either be a set of 'column' elements
+        with a name and index (as in dynamic options config), or a shorthand
+        comma separated list of names in order as the text of a 'column_names'
+        element.
+        
+        A column named 'value' is required. 
+        """
+        self.columns = {}
+        if config_element.find( 'columns' ) is not None:
+            column_names = util.xml_text( config_element.find( 'columns' ) )
+            column_names = [ n.strip() for n in column_names.split( ',' ) ]
+            for index, name in enumerate( column_names ):
+                self.columns[ name ] = index
+                self.largest_index = index
+        else:
+            for column_elem in config_element.findall( 'column' ):
+                name = column_elem.get( 'name', None )
+                assert name is not None, "Required 'name' attribute missing from column def"
+                index = column_elem.get( 'index', None )
+                assert index is not None, "Required 'index' attribute missing from column def"
+                index = int( index )
+                self.columns[name] = index
+                if index > self.largest_index:
+                    self.largest_index = index
+        assert 'value' in self.columns, "Required 'value' column missing from column def"
+        if 'name' not in self.columns:
+            self.columns['name'] = self.columns['value']
+        
+    def parse_file_fields( self, reader ):
+        """
+        Parse separated lines from file and return a list of tuples.
+        
+        TODO: Allow named access to fields using the column names.
+        """
+        rval = []
+        for line in reader:
+            if line.lstrip().startswith( self.comment_char ):
+                continue
+            line = line.rstrip( "\n\r" )
+            if line:
+                fields = line.split( self.separator )
+                if self.largest_index < len( fields ):
+                    rval.append( fields )
+        return rval        
+
+# Registry of tool data types by type_key
+tool_data_table_types = dict( [ ( cls.type_key, cls ) for cls in [ TabularToolDataTable ] ] )