August 2010 - galaxy-commits - lists.galaxyproject.org

galaxy-dist commit f6dbbf8922c8: merge
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks(a)emory.edu> # Date 1280946972 14400 # Node ID f6dbbf8922c840347975e0d725f6c45bc0f669a6 # Parent cce2225b8eea41c6ff3779aa4b406b0b6018913c # Parent 4d990430c1c1eeb498a8345a8f6f3916cfacc869 merge

1 0

galaxy-dist commit b7cf694b28c2: Update gops_intersect and gops_subtract documentation to reflect that tools can accept both BED and GFF files.
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks(a)emory.edu> # Date 1280945077 14400 # Node ID b7cf694b28c2461f652e76593f5fad088756d813 # Parent 77575a5f348d7d10c1df423c5faa71abaefd5f8a Update gops_intersect and gops_subtract documentation to reflect that tools can accept both BED and GFF files. --- a/tools/new_operations/gops_subtract.py +++ b/tools/new_operations/gops_subtract.py @@ -1,9 +1,9 @@ #!/usr/bin/env python """ -Find regions of first bed file that do not overlap regions in a second -bed file +Find regions of first interval file that do not overlap regions in a second +interval file. Interval files can either be BED or GFF format. -usage: %prog bed_file_1 bed_file_2 out_file +usage: %prog interval_file_1 interval_file_2 out_file -1, --cols1=N,N,N,N: Columns for start, end, strand in first file -2, --cols2=N,N,N,N: Columns for start, end, strand in second file -m, --mincols=N: Require this much overlap (default 1bp) --- a/tools/new_operations/gops_intersect.py +++ b/tools/new_operations/gops_intersect.py @@ -1,8 +1,9 @@ #!/usr/bin/env python """ -Find regions of first interval/GFF file that overlap regions in a second interval/GFF file +Find regions of first interval file that overlap regions in a second interval file. +Interval files can either be BED or GFF format. -usage: %prog bed_file_1 bed_file_2 out_file +usage: %prog interval_file_1 interval_file_2 out_file -1, --cols1=N,N,N,N: Columns for start, end, strand in first file -2, --cols2=N,N,N,N: Columns for start, end, strand in second file -m, --mincols=N: Require this much overlap (default 1bp)

1 0

galaxy-dist commit d3c41a755fa8: Missing tool data files are now a warning not a fatal error. This means that table can load but be empty, and the tool will also still load but have no options for the field that is connected to that data table. Is this too lenient?
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User James Taylor <james(a)jamestaylor.org> # Date 1280947283 14400 # Node ID d3c41a755fa85c68657c7525067531a2ce72fbb2 # Parent f6dbbf8922c840347975e0d725f6c45bc0f669a6 Missing tool data files are now a warning not a fatal error. This means that table can load but be empty, and the tool will also still load but have no options for the field that is connected to that data table. Is this too lenient? --- a/lib/galaxy/tools/data/__init__.py +++ b/lib/galaxy/tools/data/__init__.py @@ -36,7 +36,6 @@ class ToolDataTableManager( object ): table = tool_data_table_types[ type ]( table_elem ) self.data_tables[ table.name ] = table log.debug( "Loaded tool data table '%s", table.name ) - print >> sys.stderr, repr( self.data_tables ) class ToolDataTable( object ): def __init__( self, config_element ): @@ -72,9 +71,10 @@ class TabularToolDataTable( ToolDataTabl all_rows = [] for file_element in config_element.findall( 'file' ): filename = file_element.get( 'path' ) - assert os.path.exists( filename ), \ - "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) - all_rows.extend( self.parse_file_fields( open( filename ) ) ) + if not os.path.exists( filename ): + log.warn( "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) ) + else: + all_rows.extend( self.parse_file_fields( open( filename ) ) ) self.data = all_rows def get_fields( self ):

1 0

galaxy-dist commit cce2225b8eea: Improvements to the GFF filtering tool.
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks(a)emory.edu> # Date 1280946953 14400 # Node ID cce2225b8eea41c6ff3779aa4b406b0b6018913c # Parent 99a3437916457d1971820820733377664fab403f Improvements to the GFF filtering tool. --- a/tools/ngs_rna/gff_filtering.py +++ b/tools/ngs_rna/gff_filtering.py @@ -48,7 +48,7 @@ mapped_str = { for key, value in mapped_str.items(): cond_text = cond_text.replace( key, value ) -# Add attribute name to condition text. +# Condition text is 'attribute meets condition.' cond_text = attribute_name + cond_text # Attempt to determine if the condition includes executable stuff and, if so, exit @@ -62,7 +62,7 @@ for operand in operands: stop_err( "Illegal value '%s' in condition '%s'" % ( operand, cond_text ) ) # Set up assignment. -assignment = "%s = attributes[ '%s' ]" % ( attribute_name, attribute_name ) +assignment = "%s = attributes.get('%s', None)" % ( attribute_name, attribute_name ) # Set up type casting based on attribute type. type_cast = "%s = %s(%s)" % ( attribute_name, attribute_type, attribute_name) @@ -103,16 +103,18 @@ for i, line in enumerate( file( in_fname value = pair[1].strip(" \\"") attributes[name] = value %s - %s if %s: - lines_kept += 1 - print >> out, line - except: + %s + if %s: + lines_kept += 1 + print >> out, line + except Exception, e: skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line -''' % ( assignment, type_cast, cond_text ) +''' % ( assignment, attribute_name, type_cast, cond_text ) + valid_filter = True try: --- a/tools/ngs_rna/gff_filtering.xml +++ b/tools/ngs_rna/gff_filtering.xml @@ -14,7 +14,7 @@ </param><param name="attribute_type" type="select" label="Attribute type"><option value="float">Float</option> - <option value="integer">Integer</option> + <option value="int">Integer</option><option value="str">String</option></param><param name="cond" size="40" type="text" value=">0" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool.">

1 0

galaxy-dist commit 99a343791645: merge
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks(a)emory.edu> # Date 1280945122 14400 # Node ID 99a3437916457d1971820820733377664fab403f # Parent b7cf694b28c2461f652e76593f5fad088756d813 # Parent 23173803734597fe8cce4215d18d8d52c061c303 merge

1 0

galaxy-dist commit 4d990430c1c1: Modified translogger middleware that properly propogates exceptions, should eliminate 'Attempt to set headers a second time w/o an exc_info'
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User James Taylor <james(a)jamestaylor.org> # Date 1280946789 14400 # Node ID 4d990430c1c1eeb498a8345a8f6f3916cfacc869 # Parent 99a3437916457d1971820820733377664fab403f Modified translogger middleware that properly propogates exceptions, should eliminate 'Attempt to set headers a second time w/o an exc_info' --- a/lib/galaxy/web/buildapp.py +++ b/lib/galaxy/web/buildapp.py @@ -190,7 +190,7 @@ def wrap_in_middleware( app, global_conf log.debug( "Enabling 'error' middleware" ) # Transaction logging (apache access.log style) if asbool( conf.get( 'use_translogger', True ) ): - from paste.translogger import TransLogger + from framework.middleware.translogger import TransLogger app = TransLogger( app ) log.debug( "Enabling 'trans logger' middleware" ) # Config middleware just stores the paste config along with the request, --- /dev/null +++ b/lib/galaxy/web/framework/middleware/translogger.py @@ -0,0 +1,116 @@ +# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) +# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php +""" +Middleware for logging requests, using Apache combined log format +""" + +import logging +import time +import urllib + +class TransLogger(object): + """ + This logging middleware will log all requests as they go through. + They are, by default, sent to a logger named ``'wsgi'`` at the + INFO level. + + If ``setup_console_handler`` is true, then messages for the named + logger will be sent to the console. + """ + + format = ('%(REMOTE_ADDR)s - %(REMOTE_USER)s [%(time)s] ' + '"%(REQUEST_METHOD)s %(REQUEST_URI)s %(HTTP_VERSION)s" ' + '%(status)s %(bytes)s "%(HTTP_REFERER)s" "%(HTTP_USER_AGENT)s"') + + def __init__(self, application, + logger=None, + format=None, + logging_level=logging.INFO, + logger_name='wsgi', + setup_console_handler=True, + set_logger_level=logging.DEBUG): + if format is not None: + self.format = format + self.application = application + self.logging_level = logging_level + self.logger_name = logger_name + if logger is None: + self.logger = logging.getLogger(self.logger_name) + if setup_console_handler: + console = logging.StreamHandler() + console.setLevel(logging.DEBUG) + # We need to control the exact format: + console.setFormatter(logging.Formatter('%(message)s')) + self.logger.addHandler(console) + self.logger.propagate = False + if set_logger_level is not None: + self.logger.setLevel(set_logger_level) + else: + self.logger = logger + + def __call__(self, environ, start_response): + start = time.localtime() + req_uri = urllib.quote(environ.get('SCRIPT_NAME', '') + + environ.get('PATH_INFO', '')) + if environ.get('QUERY_STRING'): + req_uri += '?'+environ['QUERY_STRING'] + method = environ['REQUEST_METHOD'] + def replacement_start_response(status, headers, exc_info=None): + # @@: Ideally we would count the bytes going by if no + # content-length header was provided; but that does add + # some overhead, so at least for now we'll be lazy. + bytes = None + for name, value in headers: + if name.lower() == 'content-length': + bytes = value + self.write_log(environ, method, req_uri, start, status, bytes) + return start_response( status, headers, exc_info ) + return self.application(environ, replacement_start_response) + + def write_log(self, environ, method, req_uri, start, status, bytes): + if bytes is None: + bytes = '-' + if time.daylight: + offset = time.altzone / 60 / 60 * -100 + else: + offset = time.timezone / 60 / 60 * -100 + if offset >= 0: + offset = "+%0.4d" % (offset) + elif offset < 0: + offset = "%0.4d" % (offset) + d = { + 'REMOTE_ADDR': environ.get('REMOTE_ADDR') or '-', + 'REMOTE_USER': environ.get('REMOTE_USER') or '-', + 'REQUEST_METHOD': method, + 'REQUEST_URI': req_uri, + 'HTTP_VERSION': environ.get('SERVER_PROTOCOL'), + 'time': time.strftime('%d/%b/%Y:%H:%M:%S ', start) + offset, + 'status': status.split(None, 1)[0], + 'bytes': bytes, + 'HTTP_REFERER': environ.get('HTTP_REFERER', '-'), + 'HTTP_USER_AGENT': environ.get('HTTP_USER_AGENT', '-'), + } + message = self.format % d + self.logger.log(self.logging_level, message) + +def make_filter( + app, global_conf, + logger_name='wsgi', + format=None, + logging_level=logging.INFO, + setup_console_handler=True, + set_logger_level=logging.DEBUG): + from paste.util.converters import asbool + if isinstance(logging_level, basestring): + logging_level = logging._levelNames[logging_level] + if isinstance(set_logger_level, basestring): + set_logger_level = logging._levelNames[set_logger_level] + return TransLogger( + app, + format=format or None, + logging_level=logging_level, + logger_name=logger_name, + setup_console_handler=asbool(setup_console_handler), + set_logger_level=set_logger_level) + +make_filter.__doc__ = TransLogger.__doc__

1 0

galaxy-dist commit 77575a5f348d: lims:
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User rc # Date 1280930211 14400 # Node ID 77575a5f348d7d10c1df423c5faa71abaefd5f8a # Parent 6e19bc97f2b8ba816ab20fc53a88f3504c992e00 lims: - bug fix: now a admin cannot delete a transfer dataset when the transfer has started. - added user warning when a dataset is deleted. --- a/lib/galaxy/web/controllers/requests_admin.py +++ b/lib/galaxy/web/controllers/requests_admin.py @@ -593,16 +593,25 @@ class RequestsAdmin( BaseController ): elif operation == "delete": id_list = util.listify( kwd['id'] ) + not_deleted = [] for id in id_list: sample_dataset = trans.sa_session.query( trans.app.model.SampleDataset ).get( trans.security.decode_id(id) ) sample_id = sample_dataset.sample_id - trans.sa_session.delete( sample_dataset ) - trans.sa_session.flush() + if sample_dataset.status == sample_dataset.sample.transfer_status.NOT_STARTED: + trans.sa_session.delete( sample_dataset ) + trans.sa_session.flush() + else: + not_deleted.append(sample_dataset.name) + message = '%i dataset(s) have been successfully deleted. ' % (len(id_list) - len(not_deleted)) + status = 'done' + if not_deleted: + status = 'warning' + message = message + '%s could not be deleted. Only datasets with transfer status "Not Started" can be deleted. ' % str(not_deleted) return trans.response.send_redirect( web.url_for( controller='requests_admin', action='manage_datasets', sample_id=sample_id, - status='done', - message="%i dataset(s) have been removed." % len(id_list)) ) + status=status, + message=message) ) elif operation == "rename": id_list = util.listify( kwd['id'] )

1 0

galaxy-dist commit 2447b9a4dae3: Make "loc files" more flexible by adding "tool data tables". These are
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User James Taylor <james(a)jamestaylor.org> # Date 1278614219 14400 # Node ID 2447b9a4dae30b17df089290a6471f3401fa5f78 # Parent e683c6995fb5d032660d1c28cff2e1bc578d1298 Make "loc files" more flexible by adding "tool data tables". These are configured at the application level. Specific tabular data files are specified in a application config file and bound to names, the tools then refer to these names. Thus users can configure where location files are located without modifying tool configs. Also: - Simpler column name configuration - Columns can be referred to by name in addition to index in all dynamic option filters - A data table can merge multiple files - Design can support other types of data files --- a/lib/galaxy/tools/parameters/dynamic_options.py +++ b/lib/galaxy/tools/parameters/dynamic_options.py @@ -46,9 +46,9 @@ class StaticValueFilter( Filter ): Filter.__init__( self, d_option, elem ) self.value = elem.get( "value", None ) assert self.value is not None, "Required 'value' attribute missing from filter" - self.column = elem.get( "column", None ) - assert self.column is not None, "Required 'column' attribute missing from filter, when loading from file" - self.column = int ( self.column ) + column = elem.get( "column", None ) + assert column is not None, "Required 'column' attribute missing from filter, when loading from file" + self.column = d_option.column_spec_to_index( column ) self.keep = string_as_bool( elem.get( "keep", 'True' ) ) def filter_options( self, options, trans, other_values ): rval = [] @@ -81,11 +81,11 @@ class DataMetaFilter( Filter ): d_option.has_dataset_dependencies = True self.key = elem.get( "key", None ) assert self.key is not None, "Required 'key' attribute missing from filter" - self.column = elem.get( "column", None ) - if self.column is None: + column = elem.get( "column", None ) + if column is None: assert self.dynamic_option.file_fields is None and self.dynamic_option.dataset_ref_name is None, "Required 'column' attribute missing from filter, when loading from file" else: - self.column = int ( self.column ) + self.column = d_option.column_spec_to_index( column ) self.multiple = string_as_bool( elem.get( "multiple", "False" ) ) self.separator = elem.get( "separator", "," ) def get_dependency_name( self ): @@ -141,9 +141,9 @@ class ParamValueFilter( Filter ): Filter.__init__( self, d_option, elem ) self.ref_name = elem.get( "ref", None ) assert self.ref_name is not None, "Required 'ref' attribute missing from filter" - self.column = elem.get( "column", None ) - assert self.column is not None, "Required 'column' attribute missing from filter" - self.column = int ( self.column ) + column = elem.get( "column", None ) + assert column is not None, "Required 'column' attribute missing from filter" + self.column = d_option.column_spec_to_index( column ) self.keep = string_as_bool( elem.get( "keep", 'True' ) ) def get_dependency_name( self ): return self.ref_name @@ -168,9 +168,9 @@ class UniqueValueFilter( Filter ): """ def __init__( self, d_option, elem ): Filter.__init__( self, d_option, elem ) - self.column = elem.get( "column", None ) - assert self.column is not None, "Required 'column' attribute missing from filter" - self.column = int ( self.column ) + column = elem.get( "column", None ) + assert column is not None, "Required 'column' attribute missing from filter" + self.column = d_option.column_spec_to_index( column ) def get_dependency_name( self ): return self.dynamic_option.dataset_ref_name def filter_options( self, options, trans, other_values ): @@ -196,9 +196,9 @@ class MultipleSplitterFilter( Filter ): def __init__( self, d_option, elem ): Filter.__init__( self, d_option, elem ) self.separator = elem.get( "separator", "," ) - self.columns = elem.get( "column", None ) - assert self.columns is not None, "Required 'columns' attribute missing from filter" - self.columns = [ int ( column ) for column in self.columns.split( "," ) ] + columns = elem.get( "column", None ) + assert columns is not None, "Required 'columns' attribute missing from filter" + self.columns = [ d_option.column_spec_to_index( column ) for column in columns.split( "," ) ] def filter_options( self, options, trans, other_values ): rval = [] for fields in options: @@ -302,9 +302,9 @@ class SortByColumnFilter( Filter ): """ def __init__( self, d_option, elem ): Filter.__init__( self, d_option, elem ) - self.column = elem.get( "column", None ) - assert self.column is not None, "Required 'column' attribute missing from filter" - self.column = int( self.column ) + column = elem.get( "column", None ) + assert column is not None, "Required 'column' attribute missing from filter" + self.column = d_option.column_spec_to_index( column ) def filter_options( self, options, trans, other_values ): rval = [] for i, fields in enumerate( options ): @@ -354,20 +354,25 @@ class DynamicOptions( object ): data_file = elem.get( 'from_file', None ) dataset_file = elem.get( 'from_dataset', None ) from_parameter = elem.get( 'from_parameter', None ) - if data_file is not None or dataset_file is not None or from_parameter is not None: - for column_elem in elem.findall( 'column' ): - name = column_elem.get( 'name', None ) - assert name is not None, "Required 'name' attribute missing from column def" - index = column_elem.get( 'index', None ) - assert index is not None, "Required 'index' attribute missing from column def" - index = int( index ) - self.columns[name] = index - if index > self.largest_index: - self.largest_index = index - assert 'value' in self.columns, "Required 'value' column missing from column def" - if 'name' not in self.columns: - self.columns['name'] = self.columns['value'] + tool_data_table_name = elem.get( 'from_data_table', None ) + + # Options are defined from a data table loaded by the app + self.tool_data_table = None + if tool_data_table_name: + app = tool_param.tool.app + assert tool_data_table_name in app.tool_data_tables, \ + "Data table named '%s' is required by tool but not configured" % tool_data_table_name + self.tool_data_table = app.tool_data_tables[ tool_data_table_name ] + # Column definitions are optional, but if provided override those from the table + if elem.find( "column" ) is not None: + self.parse_column_definitions( elem ) + else: + self.columns = self.tool_data_table.columns + # Options are defined by parsing tabular text data from an data file + # on disk, a dataset, or the value of another parameter + elif data_file is not None or dataset_file is not None or from_parameter is not None: + self.parse_column_definitions( elem ) if data_file is not None: data_file = data_file.strip() if not os.path.isabs( data_file ): @@ -388,6 +393,20 @@ class DynamicOptions( object ): # Load Validators for validator in elem.findall( 'validator' ): self.validators.append( validation.Validator.from_element( self.tool_param, validator ) ) + + def parse_column_definitions( self, elem ): + for column_elem in elem.findall( 'column' ): + name = column_elem.get( 'name', None ) + assert name is not None, "Required 'name' attribute missing from column def" + index = column_elem.get( 'index', None ) + assert index is not None, "Required 'index' attribute missing from column def" + index = int( index ) + self.columns[name] = index + if index > self.largest_index: + self.largest_index = index + assert 'value' in self.columns, "Required 'value' column missing from column def" + if 'name' not in self.columns: + self.columns['name'] = self.columns['value'] def parse_file_fields( self, reader ): rval = [] @@ -421,6 +440,8 @@ class DynamicOptions( object ): assert dataset is not None, "Required dataset '%s' missing from input" % self.dataset_ref_name if not dataset: return [] #no valid dataset in history options = self.parse_file_fields( open( dataset.file_name ) ) + elif self.tool_data_table: + options = self.tool_data_table.get_fields() else: options = list( self.file_fields ) for filter in self.filters: @@ -429,7 +450,7 @@ class DynamicOptions( object ): def get_options( self, trans, other_values ): rval = [] - if self.file_fields is not None or self.dataset_ref_name is not None: + if self.file_fields is not None or self.tool_data_table is not None or self.dataset_ref_name is not None: options = self.get_fields( trans, other_values ) for fields in options: rval.append( ( fields[self.columns['name']], fields[self.columns['value']], False ) ) @@ -437,3 +458,15 @@ class DynamicOptions( object ): for filter in self.filters: rval = filter.filter_options( rval, trans, other_values ) return rval + + def column_spec_to_index( self, column_spec ): + """ + Convert a column specification (as read from the config file), to an + index. A column specification can just be a number, a column name, or + a column alias. + """ + # Name? + if column_spec in self.columns: + return self.columns[column_spec] + # Int? + return int( column_spec ) --- a/tools/sr_mapping/bowtie_wrapper.xml +++ b/tools/sr_mapping/bowtie_wrapper.xml @@ -192,10 +192,13 @@ </param><when value="indexed"><param name="index" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team"> + <options from_data_table="bowtie_indexes"/> + </param></when><when value="history"> --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -231,13 +231,17 @@ def rst_to_html( s ): log.warn( str ) return docutils.core.publish_string( s, writer=HTMLFragWriter(), settings_overrides=dict( warning_stream=FakeStream() ) ) -def xml_text(root, name): +def xml_text(root, name=None): """Returns the text inside an element""" - # Try attribute first - val = root.get(name) - if val: return val - # Then try as element - elem = root.find(name) + if name is not None: + # Try attribute first + val = root.get(name) + if val: + return val + # Then try as element + elem = root.find(name) + else: + elem = root if elem is not None and elem.text: text = ''.join(elem.text.splitlines()) return text.strip() --- a/setup.sh +++ b/setup.sh @@ -7,6 +7,7 @@ SAMPLES=" datatypes_conf.xml.sample reports_wsgi.ini.sample tool_conf.xml.sample +tool_data_table_conf.xml.sample universe_wsgi.ini.sample tool-data/alignseq.loc.sample tool-data/annotation_profiler_options.xml.sample --- a/tools/sr_mapping/bwa_wrapper.xml +++ b/tools/sr_mapping/bwa_wrapper.xml @@ -34,10 +34,13 @@ </param><when value="indexed"><param name="indices" type="select" label="Select a reference genome"> + <options from_data_table="bwa_indexes"/> + </param></when><when value="history"> --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -2,6 +2,7 @@ import sys, os, atexit from galaxy import config, jobs, util, tools, web import galaxy.tools.search +import galaxy.tools.data from galaxy.web import security import galaxy.model import galaxy.datatypes.registry @@ -36,6 +37,8 @@ class UniverseApplication( object ): self.security = security.SecurityHelper( id_secret=self.config.id_secret ) # Tag handler self.tag_handler = GalaxyTagHandler() + # Tool data tables + self.tool_data_tables = galaxy.tools.data.ToolDataTableManager( self.config.tool_data_table_config_path ) # Initialize the tools self.toolbox = tools.ToolBox( self.config.tool_config, self.config.tool_path, self ) # Search support for tools --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -48,6 +48,7 @@ class Configuration( object ): self.tool_data_path = resolve_path( kwargs.get( "tool_data_path", "tool-data" ), os.getcwd() ) self.test_conf = resolve_path( kwargs.get( "test_conf", "" ), self.root ) self.tool_config = resolve_path( kwargs.get( 'tool_config_file', 'tool_conf.xml' ), self.root ) + self.tool_data_table_config_path = resolve_path( kwargs.get( 'tool_data_table_config_path', 'tool_data_table_conf.xml' ), self.root ) self.tool_secret = kwargs.get( "tool_secret", "" ) self.id_secret = kwargs.get( "id_secret", "USING THE DEFAULT IS NOT SECURE!" ) self.set_metadata_externally = string_as_bool( kwargs.get( "set_metadata_externally", "False" ) ) --- a/tools/maf/interval2maf.xml +++ b/tools/maf/interval2maf.xml @@ -32,22 +32,24 @@ </when><when value="cached"><param name="mafType" type="select" label="Choose alignments"> - <options from_file="maf_index.loc"> + <options from_data_table="indexed_maf_files"> +  + <filter type="data_meta" ref="input1" key="dbkey" column="dbkey" multiple="True" separator=","/><validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/></options></param><param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment"> - <options from_file="maf_index.loc"> + <options from_data_table="indexed_maf_files"><column name="uid" index="1"/><column name="value" index="3"/><column name="name" index="3"/> - <filter type="param_value" ref="mafType" name="uid" column="1"/> - <filter type="multiple_splitter" column="3" separator=","/> + <filter type="param_value" ref="mafType" column="uid"/> + <filter type="multiple_splitter" column="name" separator=","/></options></param></when>

1 0

galaxy-dist commit c164c2fb3a65: Update tool data table config file
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User James Taylor <james(a)jamestaylor.org> # Date 1278616898 14400 # Node ID c164c2fb3a6516f8a086956a118145ae4aa1093f # Parent cd941e492bc0bc6930b8ac32190459d9d536078b Update tool data table config file --- a/tool_data_table_conf.xml.sample +++ b/tool_data_table_conf.xml.sample @@ -1,6 +1,17 @@ <tables> + <table name="indexed_maf_files"> - <column_names>name, value, dbkey, species</column_names> - <file name="tool-data/maf_index.loc" /> + <columns>name, value, dbkey, species</columns> + <file path="tool-data/maf_index.loc" /> + </table> +  + <table name="bwa_indexes"> + <columns>name, value</columns> + <file path="tool-data/bwa_index.loc" /> + </table> +  + <table name="bowtie_indexes"> + <columns>name, value</columns> + <file path="tool-data/bowtie_indices.loc" /></table></tables>

1 0

galaxy-dist commit cd941e492bc0: Two missing files from previous commit (tool data tables)
by commits-noreply＠bitbucket.org 20 Aug '10

20 Aug '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User James Taylor <james(a)jamestaylor.org> # Date 1278615679 14400 # Node ID cd941e492bc0bc6930b8ac32190459d9d536078b # Parent 2447b9a4dae30b17df089290a6471f3401fa5f78 Two missing files from previous commit (tool data tables) --- /dev/null +++ b/tool_data_table_conf.xml.sample @@ -0,0 +1,6 @@ +<tables> + <table name="indexed_maf_files"> + <column_names>name, value, dbkey, species</column_names> + <file name="tool-data/maf_index.loc" /> + </table> +</tables> --- /dev/null +++ b/lib/galaxy/tools/data/__init__.py @@ -0,0 +1,131 @@ +""" +Manage tool data tables, which store (at the application level) data that is +used by tools, for example in the generation of dynamic options. Tables are +loaded and stored by names which tools use to refer to them. This allows +users to configure data tables for a local Galaxy instance without needing +to modify the tool configurations. +""" + +import logging, sys, os.path +from galaxy import util + +log = logging.getLogger( __name__ ) + +class ToolDataTableManager( object ): + """ + Manages a collection of tool data tables + """ + + def __init__( self, config_filename=None ): + self.data_tables = {} + if config_filename: + self.add_from_config_file( config_filename ) + + def __getitem__( self, key ): + return self.data_tables.__getitem__( key ) + + def __contains__( self, key ): + return self.data_tables.__contains__( key ) + + def add_from_config_file( self, config_filename ): + tree = util.parse_xml( config_filename ) + root = tree.getroot() + for table_elem in root.findall( 'table' ): + type = table_elem.get( 'type', 'tabular' ) + assert type in tool_data_table_types, "Unknown data table type '%s'" % type + table = tool_data_table_types[ type ]( table_elem ) + self.data_tables[ table.name ] = table + log.debug( "Loaded tool data table '%s", table.name ) + print >> sys.stderr, repr( self.data_tables ) + +class ToolDataTable( object ): + def __init__( self, config_element ): + self.name = config_element.get( 'name' ) + +class TabularToolDataTable( ToolDataTable ): + """ + Data stored in a tabular / separated value format on disk, allows multiple + files to be merged but all must have the same column definitions. + + <table type="tabular" name="test"> + <column name='...' index = '...' /> + <file path="..." /> + <file path="..." /> + </table> + """ + + type_key = 'tabular' + + def __init__( self, config_element ): + super( TabularToolDataTable, self ).__init__( config_element ) + self.configure_and_load( config_element ) + + def configure_and_load( self, config_element ): + """ + Configure and load table from an XML element. + """ + self.separator = config_element.get( 'separator', '\t' ) + self.comment_char = config_element.get( 'comment_char', '#' ) + # Configure columns + self.parse_column_spec( config_element ) + # Read every file + all_rows = [] + for file_element in config_element.findall( 'file' ): + filename = file_element.get( 'path' ) + assert os.path.exists( filename ), \ + "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) + all_rows.extend( self.parse_file_fields( open( filename ) ) ) + self.data = all_rows + + def get_fields( self ): + return self.data + + def parse_column_spec( self, config_element ): + """ + Parse column definitions, which can either be a set of 'column' elements + with a name and index (as in dynamic options config), or a shorthand + comma separated list of names in order as the text of a 'column_names' + element. + + A column named 'value' is required. + """ + self.columns = {} + if config_element.find( 'columns' ) is not None: + column_names = util.xml_text( config_element.find( 'columns' ) ) + column_names = [ n.strip() for n in column_names.split( ',' ) ] + for index, name in enumerate( column_names ): + self.columns[ name ] = index + self.largest_index = index + else: + for column_elem in config_element.findall( 'column' ): + name = column_elem.get( 'name', None ) + assert name is not None, "Required 'name' attribute missing from column def" + index = column_elem.get( 'index', None ) + assert index is not None, "Required 'index' attribute missing from column def" + index = int( index ) + self.columns[name] = index + if index > self.largest_index: + self.largest_index = index + assert 'value' in self.columns, "Required 'value' column missing from column def" + if 'name' not in self.columns: + self.columns['name'] = self.columns['value'] + + def parse_file_fields( self, reader ): + """ + Parse separated lines from file and return a list of tuples. + + TODO: Allow named access to fields using the column names. + """ + rval = [] + for line in reader: + if line.lstrip().startswith( self.comment_char ): + continue + line = line.rstrip( "\n\r" ) + if line: + fields = line.split( self.separator ) + if self.largest_index < len( fields ): + rval.append( fields ) + return rval + +# Registry of tool data types by type_key +tool_data_table_types = dict( [ ( cls.type_key, cls ) for cls in [ TabularToolDataTable ] ] )

1 0