# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks@emory.edu> # Date 1286804627 14400 # Node ID b67978eeba2c67c86453515d90f81a13acb88586 # Parent ddcfb334eb3ad53425a62f4b094374b219daa455 Add framework for specifying dynamic filters for tracks; framework uses datatype and dataset metadata to specify filters. VCF data provider uses framework to define filter for quality scores. Trackster client does not yet support filters. --- a/lib/galaxy/datatypes/tabular.py +++ b/lib/galaxy/datatypes/tabular.py @@ -462,7 +462,14 @@ class ElandMulti( Tabular ): return False class Vcf( Tabular ): + """ Variant Call Format for describing SNPs and other simple genome variations. """ + file_ext = 'vcf' + column_names = [ 'Chrom', 'Pos', 'ID', 'Ref', 'Alt', 'Qual', 'Filter', 'Info', 'Format', 'data' ] + + MetadataElement( name="columns", default=10, desc="Number of columns", readonly=True, visible=False ) + MetadataElement( name="column_types", default=['str','int','str','str','str','int','str','list','str','str'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False ) + MetadataElement( name="viz_filter_columns", default=[5] ) def sniff( self, filename ): try: @@ -472,6 +479,21 @@ class Vcf( Tabular ): return True except: return False + + def make_html_table( self, dataset, skipchars=[] ): + """Create HTML table, used for displaying peek""" + out = ['<table cellspacing="0" cellpadding="3">'] + try: + # Generate column header + out.append( '<tr>' ) + for i, name in enumerate( self.column_names ): + out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) ) + out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) + out.append( '</table>' ) + out = "".join( out ) + except Exception, exc: + out = "Can't create peek %s" % exc + return out def get_track_type( self ): return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"} --- a/lib/galaxy/web/controllers/tracks.py +++ b/lib/galaxy/web/controllers/tracks.py @@ -24,7 +24,8 @@ from galaxy.visualization.tracks.data.ar from galaxy.visualization.tracks.data.interval_index import IntervalIndexDataProvider from galaxy.visualization.tracks.data.bam import BamDataProvider from galaxy.visualization.tracks.data.summary_tree import SummaryTreeDataProvider -from galaxy.visualization.tracks.data.vcf import VCFDataProvider +from galaxy.visualization.tracks.data.vcf import VcfDataProvider +from galaxy.visualization.tracks.data.base import dataset_to_data_provider # Message strings returned to browser messages = Bunch( @@ -39,9 +40,10 @@ messages = Bunch( # Mapping from dataset type to a class that can fetch data from a file of that # type. First key is converted dataset type; if result is another dict, second key # is original dataset type. TODO: This needs to be more flexible. +# TODO: move this mapping into TracksDataProvider dataset_type_to_data_provider = { "array_tree": ArrayTreeDataProvider, - "interval_index": { "vcf": VCFDataProvider, "default" : IntervalIndexDataProvider }, + "interval_index": { "vcf": VcfDataProvider, "default" : IntervalIndexDataProvider }, "bai": BamDataProvider, "summary_tree": SummaryTreeDataProvider } @@ -150,12 +152,15 @@ class TracksController( BaseController, hda_query = trans.sa_session.query( model.HistoryDatasetAssociation ) dataset = hda_query.get( dataset_id ) track_type, _ = dataset.datatype.get_track_type() + track_data_provider_class = dataset_to_data_provider( dataset ) + track_data_provider = track_data_provider_class( original_dataset=dataset ) track = { "track_type": track_type, "name": dataset.name, "dataset_id": dataset.id, "prefs": {}, + "filters": track_data_provider.get_filters() } return track --- a/lib/galaxy/visualization/tracks/data/base.py +++ b/lib/galaxy/visualization/tracks/data/base.py @@ -1,7 +1,17 @@ +from galaxy.datatypes.tabular import Vcf +from galaxy.visualization.tracks import data + class TracksDataProvider( object ): """ Base class for tracks data providers. """ - def __init__( self, converted_dataset, original_dataset ): + """ + Mapping from column name to index in data. This mapping is used to create + filters. + """ + col_name_data_index_mapping = {} + + def __init__( self, converted_dataset=None, original_dataset=None ): + """ Create basic data provider. """ self.converted_dataset = converted_dataset self.original_dataset = original_dataset @@ -9,3 +19,59 @@ class TracksDataProvider( object ): """ Returns data in region defined by chrom, start, and end. """ # Override. pass + + def get_filters( self ): + """ + Returns filters for provider's data. Return value is a list of + filters; each filter is a dictionary with the keys 'name', 'index', 'value'. + NOTE: This method uses the original dataset's datatype and metadata to + create the filters. + """ + # Get column names. + try: + column_names = self.original_dataset.datatype.column_names + except AttributeError: + column_names = range( self.original_dataset.metadata.columns ) + + # Dataset must have column types; if not, cannot create filters. + try: + column_types = self.original_dataset.metadata.column_types + except AttributeError: + return [] + + # Create and return filters. + filters = [] + if self.original_dataset.metadata.viz_filter_columns: + for viz_col_index in self.original_dataset.metadata.viz_filter_columns: + col_name = column_names[ viz_col_index ] + # Make sure that column has a mapped index. If not, do not add filter. + try: + index = self.col_name_data_index_mapping[ col_name ] + except KeyError: + continue + filters.append( + { 'name' : col_name, 'value' : column_types[viz_col_index], \ + 'index' : index } ) + return filters + + +# +# Helper methods. +# + +def dataset_to_data_provider( dataset=None ): + """ + Returns data provider for a dataset. + """ + # TODO: merge this method with the dict in tracks controller to provide a + # unified way to get data providers based on dataset/converted dataset type. + if isinstance( dataset.datatype, Vcf ): + return data.vcf.VcfDataProvider + else: + try: + # If get_track_type is available, then generic data provider + # should work. + dataset.datatype.get_track_type() + return TracksDataProvider + except e: + return None --- a/lib/galaxy/visualization/tracks/data/vcf.py +++ b/lib/galaxy/visualization/tracks/data/vcf.py @@ -1,10 +1,3 @@ -""" -VCF data provider for the Galaxy track browser. - -Payload format: -[ uid (offset), start, end, ID, reference base(s), alternate base(s), quality score] -""" - import pkg_resources; pkg_resources.require( "bx-python" ) from bx.interval_index_file import Indexes from galaxy.datatypes.tabular import Vcf @@ -12,8 +5,15 @@ from base import TracksDataProvider MAX_VALS = 5000 # only display first MAX_VALS features -class VCFDataProvider( TracksDataProvider ): - """ Provides data for VCF tracks. """ +class VcfDataProvider( TracksDataProvider ): + """ + VCF data provider for the Galaxy track browser. + + Payload format: + [ uid (offset), start, end, ID, reference base(s), alternate base(s), quality score] + """ + + col_name_data_index_mapping = { 'Qual' : 6 } def get_data( self, chrom, start, end, **kwargs ): """ Returns data in region defined by chrom, start, and end. """ --- a/lib/galaxy/web/base/controller.py +++ b/lib/galaxy/web/base/controller.py @@ -9,6 +9,7 @@ from galaxy.model.orm import * from galaxy.workflow.modules import * from galaxy.web.framework import simplejson from galaxy.web.form_builder import AddressField, CheckboxField, SelectField, TextArea, TextField, WorkflowField +from galaxy.visualization.tracks.data.base import dataset_to_data_provider from Cheetah.Template import Template @@ -169,11 +170,15 @@ class UsesVisualization( SharableItemSec prefs = {} dataset = hda_query.get( dataset_id ) track_type, _ = dataset.datatype.get_track_type() + track_data_provider_class = dataset_to_data_provider( dataset ) + track_data_provider = track_data_provider_class( original_dataset=dataset ) + tracks.append( { "track_type": track_type, "name": dataset.name, "dataset_id": dataset.id, "prefs": simplejson.dumps(prefs), + "filters": track_data_provider.get_filters() } ) config = { "title": visualization.title, "vis_id": trans.security.encode_id( visualization.id ),