# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User jeremy goecks <jeremy.goecks(a)emory.edu>
# Date 1280945077 14400
# Node ID b7cf694b28c2461f652e76593f5fad088756d813
# Parent 77575a5f348d7d10c1df423c5faa71abaefd5f8a
Update gops_intersect and gops_subtract documentation to reflect that tools can accept both BED and GFF files.
--- a/tools/new_operations/gops_subtract.py
+++ b/tools/new_operations/gops_subtract.py
@@ -1,9 +1,9 @@
#!/usr/bin/env python
"""
-Find regions of first bed file that do not overlap regions in a second
-bed file
+Find regions of first interval file that do not overlap regions in a second
+interval file. Interval files can either be BED or GFF format.
-usage: %prog bed_file_1 bed_file_2 out_file
+usage: %prog interval_file_1 interval_file_2 out_file
-1, --cols1=N,N,N,N: Columns for start, end, strand in first file
-2, --cols2=N,N,N,N: Columns for start, end, strand in second file
-m, --mincols=N: Require this much overlap (default 1bp)
--- a/tools/new_operations/gops_intersect.py
+++ b/tools/new_operations/gops_intersect.py
@@ -1,8 +1,9 @@
#!/usr/bin/env python
"""
-Find regions of first interval/GFF file that overlap regions in a second interval/GFF file
+Find regions of first interval file that overlap regions in a second interval file.
+Interval files can either be BED or GFF format.
-usage: %prog bed_file_1 bed_file_2 out_file
+usage: %prog interval_file_1 interval_file_2 out_file
-1, --cols1=N,N,N,N: Columns for start, end, strand in first file
-2, --cols2=N,N,N,N: Columns for start, end, strand in second file
-m, --mincols=N: Require this much overlap (default 1bp)
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User James Taylor <james(a)jamestaylor.org>
# Date 1280947283 14400
# Node ID d3c41a755fa85c68657c7525067531a2ce72fbb2
# Parent f6dbbf8922c840347975e0d725f6c45bc0f669a6
Missing tool data files are now a warning not a fatal error. This means that table can load but be empty, and the tool will also still load but have no options for the field that is connected to that data table. Is this too lenient?
--- a/lib/galaxy/tools/data/__init__.py
+++ b/lib/galaxy/tools/data/__init__.py
@@ -36,7 +36,6 @@ class ToolDataTableManager( object ):
table = tool_data_table_types[ type ]( table_elem )
self.data_tables[ table.name ] = table
log.debug( "Loaded tool data table '%s", table.name )
- print >> sys.stderr, repr( self.data_tables )
class ToolDataTable( object ):
def __init__( self, config_element ):
@@ -72,9 +71,10 @@ class TabularToolDataTable( ToolDataTabl
all_rows = []
for file_element in config_element.findall( 'file' ):
filename = file_element.get( 'path' )
- assert os.path.exists( filename ), \
- "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name )
- all_rows.extend( self.parse_file_fields( open( filename ) ) )
+ if not os.path.exists( filename ):
+ log.warn( "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) )
+ else:
+ all_rows.extend( self.parse_file_fields( open( filename ) ) )
self.data = all_rows
def get_fields( self ):
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User James Taylor <james(a)jamestaylor.org>
# Date 1278615679 14400
# Node ID cd941e492bc0bc6930b8ac32190459d9d536078b
# Parent 2447b9a4dae30b17df089290a6471f3401fa5f78
Two missing files from previous commit (tool data tables)
--- /dev/null
+++ b/tool_data_table_conf.xml.sample
@@ -0,0 +1,6 @@
+<tables>
+ <table name="indexed_maf_files">
+ <column_names>name, value, dbkey, species</column_names>
+ <file name="tool-data/maf_index.loc" />
+ </table>
+</tables>
--- /dev/null
+++ b/lib/galaxy/tools/data/__init__.py
@@ -0,0 +1,131 @@
+"""
+Manage tool data tables, which store (at the application level) data that is
+used by tools, for example in the generation of dynamic options. Tables are
+loaded and stored by names which tools use to refer to them. This allows
+users to configure data tables for a local Galaxy instance without needing
+to modify the tool configurations.
+"""
+
+import logging, sys, os.path
+from galaxy import util
+
+log = logging.getLogger( __name__ )
+
+class ToolDataTableManager( object ):
+ """
+ Manages a collection of tool data tables
+ """
+
+ def __init__( self, config_filename=None ):
+ self.data_tables = {}
+ if config_filename:
+ self.add_from_config_file( config_filename )
+
+ def __getitem__( self, key ):
+ return self.data_tables.__getitem__( key )
+
+ def __contains__( self, key ):
+ return self.data_tables.__contains__( key )
+
+ def add_from_config_file( self, config_filename ):
+ tree = util.parse_xml( config_filename )
+ root = tree.getroot()
+ for table_elem in root.findall( 'table' ):
+ type = table_elem.get( 'type', 'tabular' )
+ assert type in tool_data_table_types, "Unknown data table type '%s'" % type
+ table = tool_data_table_types[ type ]( table_elem )
+ self.data_tables[ table.name ] = table
+ log.debug( "Loaded tool data table '%s", table.name )
+ print >> sys.stderr, repr( self.data_tables )
+
+class ToolDataTable( object ):
+ def __init__( self, config_element ):
+ self.name = config_element.get( 'name' )
+
+class TabularToolDataTable( ToolDataTable ):
+ """
+ Data stored in a tabular / separated value format on disk, allows multiple
+ files to be merged but all must have the same column definitions.
+
+ <table type="tabular" name="test">
+ <column name='...' index = '...' />
+ <file path="..." />
+ <file path="..." />
+ </table>
+ """
+
+ type_key = 'tabular'
+
+ def __init__( self, config_element ):
+ super( TabularToolDataTable, self ).__init__( config_element )
+ self.configure_and_load( config_element )
+
+ def configure_and_load( self, config_element ):
+ """
+ Configure and load table from an XML element.
+ """
+ self.separator = config_element.get( 'separator', '\t' )
+ self.comment_char = config_element.get( 'comment_char', '#' )
+ # Configure columns
+ self.parse_column_spec( config_element )
+ # Read every file
+ all_rows = []
+ for file_element in config_element.findall( 'file' ):
+ filename = file_element.get( 'path' )
+ assert os.path.exists( filename ), \
+ "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name )
+ all_rows.extend( self.parse_file_fields( open( filename ) ) )
+ self.data = all_rows
+
+ def get_fields( self ):
+ return self.data
+
+ def parse_column_spec( self, config_element ):
+ """
+ Parse column definitions, which can either be a set of 'column' elements
+ with a name and index (as in dynamic options config), or a shorthand
+ comma separated list of names in order as the text of a 'column_names'
+ element.
+
+ A column named 'value' is required.
+ """
+ self.columns = {}
+ if config_element.find( 'columns' ) is not None:
+ column_names = util.xml_text( config_element.find( 'columns' ) )
+ column_names = [ n.strip() for n in column_names.split( ',' ) ]
+ for index, name in enumerate( column_names ):
+ self.columns[ name ] = index
+ self.largest_index = index
+ else:
+ for column_elem in config_element.findall( 'column' ):
+ name = column_elem.get( 'name', None )
+ assert name is not None, "Required 'name' attribute missing from column def"
+ index = column_elem.get( 'index', None )
+ assert index is not None, "Required 'index' attribute missing from column def"
+ index = int( index )
+ self.columns[name] = index
+ if index > self.largest_index:
+ self.largest_index = index
+ assert 'value' in self.columns, "Required 'value' column missing from column def"
+ if 'name' not in self.columns:
+ self.columns['name'] = self.columns['value']
+
+ def parse_file_fields( self, reader ):
+ """
+ Parse separated lines from file and return a list of tuples.
+
+ TODO: Allow named access to fields using the column names.
+ """
+ rval = []
+ for line in reader:
+ if line.lstrip().startswith( self.comment_char ):
+ continue
+ line = line.rstrip( "\n\r" )
+ if line:
+ fields = line.split( self.separator )
+ if self.largest_index < len( fields ):
+ rval.append( fields )
+ return rval
+
+# Registry of tool data types by type_key
+tool_data_table_types = dict( [ ( cls.type_key, cls ) for cls in [ TabularToolDataTable ] ] )