details: http://www.bx.psu.edu/hg/galaxy/rev/bed484895e2d changeset: 2721:bed484895e2d user: James Taylor <james@jamestaylor.org> date: Fri Sep 18 15:15:39 2009 -0400 description: More work on server side for tracks. Now uses converters instead of indexers. A wiggle to array_tree converter is provided. Still need to purge indexer stuff. This will not work without new bx egg (or likely at all) 12 file(s) affected in this change: datatypes_conf.xml.sample lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.py lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.xml lib/galaxy/tracks/__init__.py lib/galaxy/tracks/messages.py lib/galaxy/tracks/store.py lib/galaxy/visualization/__init__.py lib/galaxy/visualization/tracks/__init__.py lib/galaxy/visualization/tracks/data/__init__.py lib/galaxy/visualization/tracks/data/array_tree.py lib/galaxy/web/controllers/tracks.py static/scripts/trackster.js diffs (626 lines): diff -r 3f3712d36034 -r bed484895e2d datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample Fri Sep 18 15:18:57 2009 -0400 +++ b/datatypes_conf.xml.sample Fri Sep 18 15:15:39 2009 -0400 @@ -58,8 +58,9 @@ <datatype extension="blastxml" type="galaxy.datatypes.xml:BlastXml" display_in_upload="true"/> <datatype extension="txtseq.zip" type="galaxy.datatypes.images:Txtseq" mimetype="application/zip" display_in_upload="true"/> <datatype extension="wig" type="galaxy.datatypes.interval:Wiggle" display_in_upload="true"> - <indexer file="wiggle.xml" /> + <converter file="wiggle_to_array_tree_converter.xml" target_datatype="array_tree"/> </datatype> + <datatype extension="array_tree" type="galaxy.datatypes.data:Data" /> <!-- EMBOSS TOOLS --> <datatype extension="acedb" type="galaxy.datatypes.data:Text"/> <datatype extension="asn1" type="galaxy.datatypes.data:Text"/> diff -r 3f3712d36034 -r bed484895e2d lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.py Fri Sep 18 15:15:39 2009 -0400 @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +from __future__ import division + +import sys + +import pkg_resources; pkg_resources.require( "bx-python" ) +from bx.arrays.array_tree import * +from bx.arrays.wiggle import IntervalReader + +def main(): + + input_fname = sys.argv[1] + out_fname = sys.argv[2] + + reader = IntervalReader( open( input_fname ) ) + + # Fill array from wiggle + d = array_tree_dict_from_wiggle_reader( reader, {} ) + + for value in d.itervalues(): + value.root.build_summary() + + f = open( out_fname, "w" ) + FileArrayTreeDict.dict_to_file( d, f ) + f.close() + +if __name__ == "__main__": + main() \ No newline at end of file diff -r 3f3712d36034 -r bed484895e2d lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.xml Fri Sep 18 15:15:39 2009 -0400 @@ -0,0 +1,14 @@ +<tool id="INDEXER_Wiggle_0" name="Index Wiggle for Track Viewer"> + <!-- Used internally to generate track indexes --> + <command interpreter="python">wiggle_to_array_tree_converter.py $input $output</command> + <inputs> + <page> + <param format="wiggle" name="input" type="data" label="Choose wiggle"/> + </page> + </inputs> + <outputs> + <data format="array_tree" name="output"/> + </outputs> + <help> + </help> +</tool> \ No newline at end of file diff -r 3f3712d36034 -r bed484895e2d lib/galaxy/tracks/messages.py --- a/lib/galaxy/tracks/messages.py Fri Sep 18 15:18:57 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -PENDING = "pending" -NO_DATA = "no data" -NO_CHROMOSOME = "no chromosome" -DATA = "data" diff -r 3f3712d36034 -r bed484895e2d lib/galaxy/tracks/store.py --- a/lib/galaxy/tracks/store.py Fri Sep 18 15:18:57 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -import os -import re -from string import Template -from galaxy.util import sanitize_text - -# extra mappings/escape to keep users from traversing around the -# filesystem and wreaking havoc -extra_mappings = { r"/": "__fs__", r"^manifest.tab$": "__manifest.tab__" } - -def sanitize_name( name ): - name = sanitize_text( name ) - for key, value in extra_mappings.items(): - name = re.sub( key, value, name ) - return name - -class TemplateSubber( object ): - def __init__(self, obj): - self.obj = obj - def get( self, key, default=None ): - return getattr(self.obj, key, default) - def __getitem__(self, key): - return self.get(key) - -class TrackStoreManager( object ): - def __init__(self, path=""): - self.path = path - - def get( self, dataset ): - s = Template(self.path) - return TrackStore( path=s.substitute(TemplateSubber(dataset)) ) - -class TrackStore( object ): - MANIFEST_NAME = "manifest.tab" - - def __init__(self, path=""): - self.path = path - - def get_manifest( self ): - if not self.exists: raise self.DoesNotExist("TrackStore at %s does not exist." % self.path) - manifest_path = os.path.join( self.path, self.MANIFEST_NAME ) - if os.path.exists( manifest_path ): - manifest = {} - for line in open( manifest_path ): - line = line.rstrip("\n\r") - fields = line.split("\t") - manifest[fields[0]] = fields[1:] - return manifest - else: - raise self.DoesNotExist( "Manifest for TrackStore object could not be found." ) - - def get(self, chrom="chr1", resolution=None, **kwargs): - if not self.exists: raise self.DoesNotExist("TrackStore at %s does not exist." % self.path) - object_path = self._get_object_path( chrom, resolution ) - if os.path.exists( object_path ): - return open( object_path, "rb" ) - else: - try: - return kwargs['default'] - except KeyError: - raise self.DoesNotExist("TrackStore object at %s does not exist." % object_path ) - - def set(self, chrom="chr1", resolution=None, data=None): - if not self.exists: self._build_path( self.path ) - if not data: return - object_path = self._get_object_path( chrom, resolution ) - fd = open( object_path, "wb" ) - fd.write( data ) - fd.close() - - def _get_object_path( self, chrom, resolution ): - object_name = sanitize_name(chrom) - if resolution: object_name += "_%d" % resolution - return os.path.join( self.path, object_name ) - - def _build_path( self, path ): - try: - os.mkdir( path ) - except OSError: - self._build_path( os.path.dirname( path ) ) - os.mkdir( path ) - - @property - def exists(self): - return os.path.exists( self.path ) - - class DoesNotExist( Exception ): - pass diff -r 3f3712d36034 -r bed484895e2d lib/galaxy/visualization/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/visualization/__init__.py Fri Sep 18 15:15:39 2009 -0400 @@ -0,0 +1,3 @@ +""" +Package for Galaxy visulization plugins. +""" \ No newline at end of file diff -r 3f3712d36034 -r bed484895e2d lib/galaxy/visualization/tracks/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/visualization/tracks/__init__.py Fri Sep 18 15:15:39 2009 -0400 @@ -0,0 +1,3 @@ +""" +Package for track style visulization using the trackster UI. +""" \ No newline at end of file diff -r 3f3712d36034 -r bed484895e2d lib/galaxy/visualization/tracks/data/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/visualization/tracks/data/__init__.py Fri Sep 18 15:15:39 2009 -0400 @@ -0,0 +1,3 @@ +""" +Package for track data providers +""" \ No newline at end of file diff -r 3f3712d36034 -r bed484895e2d lib/galaxy/visualization/tracks/data/array_tree.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/visualization/tracks/data/array_tree.py Fri Sep 18 15:15:39 2009 -0400 @@ -0,0 +1,50 @@ +""" +Array tree data provider for Galaxy track browser. +""" + +import pkg_resources; pkg_resources.require( "bx-python" ) +from bx.arrays.array_tree import FileArrayTreeDict +from math import floor, ceil, log + +# Maybe this should be included in the datatype itself, so users can add their +# own types to the browser as long as they return the right format of data? + +# FIXME: Assuming block size is always 1000 for the moment +BLOCK_SIZE = 1000 + +class ArrayTreeDataProvider( object ): + def __init__( self, dataset ): + self.dataset = dataset + def get_data( self, chrom, start, end ): + start = int( start ) + end = int( end ) + level = int( ceil( log( end - start, BLOCK_SIZE ) ) ) - 1 + print "!!!!", start, end, level + # Open the file + print self.dataset.file_name + d = FileArrayTreeDict( open( self.dataset.file_name ) ) + # Get the right chromosome + try: + chrom_array_tree = d[chrom] + except KeyError: + return None + # Is the requested level valid? + assert 0 <= level <= chrom_array_tree.levels + # Calculate the actual start/range/step of the block we're getting + size = BLOCK_SIZE ** (level+1) + block_start = ( start // BLOCK_SIZE ) * BLOCK_SIZE + block_step = size // BLOCK_SIZE + indexes = range( block_start, block_start + size, block_step ) + # Return either data point or a summary depending on the level + if level > 0: + s = chrom_array_tree.get_summary( start, level ) + if s is not None: + return zip( indexes, map( float, s.sums / s.counts ) ) + else: + return None + else: + v = chrom_array_tree.get_leaf( start ) + if v is not None: + return zip( indexes, map( float, v ) ) + else: + return None \ No newline at end of file diff -r 3f3712d36034 -r bed484895e2d lib/galaxy/web/controllers/tracks.py --- a/lib/galaxy/web/controllers/tracks.py Fri Sep 18 15:18:57 2009 -0400 +++ b/lib/galaxy/web/controllers/tracks.py Fri Sep 18 15:15:39 2009 -0400 @@ -9,18 +9,44 @@ Problems -------- - - Assumes that the only indexing type in Galaxy is for this particular - application. Thus, datatypes can only have one indexer, and the presence - of an indexer results in assuming that datatype can be displayed as a track. - + - Only line tracks supported + - Resolutions are a bit wonky + - Must have a LEN file, not currently able to infer from data (not sure we + need to support that, but need to make user defined build support better) """ import math -from galaxy.tracks import messages from galaxy.util.json import to_json_string from galaxy.web.base.controller import * from galaxy.web.framework import simplejson +from galaxy.util.bunch import Bunch + +from galaxy.visualization.tracks.data.array_tree import ArrayTreeDataProvider + +# Message strings returned to browser +messages = Bunch( + PENDING = "pending", + NO_DATA = "no data", + NO_CHROMOSOME = "no chromosome", + DATA = "data" +) + +# Dataset type required for each track type. This needs to be more flexible, +# there might be mutliple types of indexes that suffice for a given track type. +track_type_to_dataset_type = { + "line": "array_tree" +} + +# Mapping from dataset type to a class that can fetch data from a file of that +# type. This also needs to be more flexible. +dataset_type_to_data_provider = { + "array_tree": ArrayTreeDataProvider +} + +# FIXME: hardcoding this for now, but it should be derived from the available +# converters +browsable_types = set( ["wig" ] ) class TracksController( BaseController ): """ @@ -41,24 +67,26 @@ session = trans.sa_session # If the user clicked the submit button explicately, try to build the browser if browse and dataset_ids: + if not isinstance( dataset_ids, list ): + dataset_ids = [ dataset_ids ] dataset_ids = ",".join( map( str, dataset_ids ) ) trans.response.send_redirect( web.url_for( controller='tracks', action='browser', chrom="", dataset_ids=dataset_ids ) ) - return - # Determine the set of all dbkeys that are used in the current history - dbkeys = [ d.metadata.dbkey for d in trans.get_history().datasets if not d.deleted ] - dbkey_set = set( dbkeys ) - # If a dbkey argument was not provided, or is no longer valid, default - # to the first one - if dbkey is None or dbkey not in dbkey_set: - dbkey = dbkeys[0] - # Find all datasets in the current history that are of that dbkey and - # have an indexer. - datasets = {} - for dataset in session.query( model.HistoryDatasetAssociation ).filter_by( deleted=False, history_id=trans.history.id ): - if dataset.metadata.dbkey == dbkey and trans.app.datatypes_registry.get_indexers_by_datatype( dataset.extension ): - datasets[dataset.id] = dataset.name - # Render the template - return trans.fill_template( "tracks/new_browser.mako", dbkey=dbkey, dbkey_set=dbkey_set, datasets=datasets ) + else: + # Determine the set of all dbkeys that are used in the current history + dbkeys = [ d.metadata.dbkey for d in trans.get_history().datasets if not d.deleted ] + dbkey_set = set( dbkeys ) + # If a dbkey argument was not provided, or is no longer valid, default + # to the first one + if dbkey is None or dbkey not in dbkey_set: + dbkey = dbkeys[0] + # Find all datasets in the current history that are of that dbkey + # and can be displayed + datasets = {} + for dataset in session.query( model.HistoryDatasetAssociation ).filter_by( deleted=False, history_id=trans.history.id ): + if dataset.metadata.dbkey == dbkey and dataset.extension in browsable_types: + datasets[dataset.id] = dataset.name + # Render the template + return trans.fill_template( "tracks/new_browser.mako", dbkey=dbkey, dbkey_set=dbkey_set, datasets=datasets ) @web.expose def browser(self, trans, dataset_ids, chrom=""): @@ -75,13 +103,15 @@ "id": dataset.id } ) dbkey = dataset.dbkey - LEN = self._chroms(trans, dbkey ).get(chrom,0) + chrom_lengths = self._chroms( trans, dbkey ) + if chrom_lengths is None: + error( "No chromosome lengths file found for '%s'" ) return trans.fill_template( 'tracks/browser.mako', dataset_ids=dataset_ids, tracks=tracks, chrom=chrom, dbkey=dbkey, - LEN=LEN ) + LEN=chrom_lengths.get( chrom, 0 ) ) @web.json def chroms(self, trans, dbkey=None ): @@ -91,85 +121,86 @@ """ Called by the browser to get a list of valid chromosomes and lengths """ + # If there is any dataset in the history of extension `len`, this will + # use it db_manifest = trans.db_dataset_for( dbkey ) if not db_manifest: db_manifest = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % dbkey ) else: db_manifest = db_manifest.file_name manifest = {} - if os.path.exists( db_manifest ): - for line in open( db_manifest ): - if line.startswith("#"): continue - line = line.rstrip("\r\n") - fields = line.split("\t") - manifest[fields[0]] = int(fields[1]) - else: - # try to fake a manifest by reading track stores - datasets = trans.app.model.HistoryDatasetAssociation.filter_by(deleted=False, history_id=trans.history.id).all() - for dataset in datasets: - if not dataset.metadata.dbkey == dbkey: continue - track_store = trans.app.track_store.get( dataset ) - if track_store.exists: - try: - for chrom, fields in track_store.get_manifest().items(): - manifest[chrom] = max(manifest.get(chrom, 0), int(fields[0])) - except track_store.DoesNotExist: - pass + if not os.path.exists( db_manifest ): + return None + for line in open( db_manifest ): + if line.startswith("#"): continue + line = line.rstrip("\r\n") + fields = line.split("\t") + manifest[fields[0]] = int(fields[1]) return manifest - - @web.json - def data( self, trans, dataset_id, chrom="", low="", high="" ): + + @web.json + def data( self, trans, dataset_id, track_type, chrom, low, high ): """ Called by the browser to request a block of data """ + # Load the requested dataset dataset = trans.app.model.HistoryDatasetAssociation.get( dataset_id ) - if not dataset: return messages.NO_DATA + # No dataset for that id + if not dataset: + return messages.NO_DATA + # Dataset is in error state, can't display if dataset.state == trans.app.model.Job.states.ERROR: return messages.NO_DATA - if not dataset.state == trans.app.model.Job.states.OK: + # Dataset is still being generated + if dataset.state != trans.app.model.Job.states.OK: return messages.PENDING - track_store = trans.app.track_store.get( dataset ) - if not track_store.exists: - # Test if we can make a track - indexers = trans.app.datatypes_registry.get_indexers_by_datatype( dataset.extension ) - if indexers: - tool = indexers[0] # They are sorted by class chain so use the top one - # If we can, return pending and launch job - job = trans.app.model.Job() - job.session_id = trans.get_galaxy_session().id - job.history_id = trans.history.id - job.tool_id = tool.id - job.tool_version = "1.0.0" - job.add_input_dataset( "input_dataset", dataset ) - job.add_parameter( "input_dataset", to_json_string( dataset.id ) ) - # This is odd - # job.add_output_dataset( "input_dataset", dataset ) - # create store path, this is rather unclear? - track_store.set() - job.add_parameter( "store_path", to_json_string( track_store.path ) ) - job.flush() - trans.app.job_manager.job_queue.put( job.id, tool ) - return messages.PENDING - else: - return messages.NO_DATA - else: - # Data for that chromosome or resolution does not exist? - # HACK: we're "pending" because the store exists without a manifest - try: - track_store.get_manifest() - except track_store.DoesNotExist: - return messages.PENDING - if chrom and low and high: - low = math.floor(float(low)) - high = math.ceil(float(high)) - resolution = dataset.datatype.get_track_resolution( dataset, low, high ) - try: - data = track_store.get( chrom, resolution ) - except track_store.DoesNotExist: - return messages.NO_DATA - window = dataset.datatype.get_track_window( dataset, data, low, high ) - glob = {"data":window, "type":dataset.datatype.get_track_type()}; - if resolution: glob["resolution"] = resolution - return window - else: - return messages.DATA + # Determine what to return based on the type of track being drawn. + converted_dataset_type = track_type_to_dataset_type[track_type] + converted_dataset = self.__dataset_as_type( trans, dataset, converted_dataset_type ) + # If at this point we still don't have an `array_tree_dataset`, there + # is now way we can display this data as an array tree + if converted_dataset is None: + return messages.NO_DATA + # Need to check states again for the converted version + if converted_dataset.state == model.Dataset.states.ERROR: + return messages.NO_DATA + if converted_dataset.state != model.Dataset.states.OK: + return messages.PENDING + # We have a dataset in the right format that is ready to use, wrap in + # a data provider that knows how to access it + data_provider = dataset_type_to_data_provider[ converted_dataset_type ]( converted_dataset ) + # Get the requested chunk of data + data = data_provider.get_data( chrom, low, high ) + # Pack into a dictionary and return + return data + + def __dataset_as_type( self, trans, dataset, type ): + """ + Given a dataset, try to find a way to adapt it to a different type. If the + dataset is already of that type it is returned, if it can be converted a + converted dataset (possibly new) is returned, if it cannot be converted, + None is returned. + """ + # Already of correct type + if dataset.extension == type: + return dataset + # See if we can convert the dataset + if type not in dataset.get_converter_types(): + log.debug( "Converstion from '%s' to '%d' not possible", dataset.extension, type ) + return None + # See if converted dataset already exists + converted_datasets = dataset.get_converted_files_by_type( type ) + if converted_datasets: + for d in converted_datasets: + if d and d.state != 'error': + return d + # Conversion is possible but doesn't exist yet, run converter here + # FIXME: this is largely duplicated from DefaultToolAction + assoc = model.ImplicitlyConvertedDatasetAssociation( parent = dataset, file_type = type, metadata_safe = False ) + new_dataset = dataset.datatype.convert_dataset( trans, dataset, type, return_output = True, visible = False ).values()[0] + new_dataset.hid = dataset.hid # Hrrmmm.... + new_dataset.name = dataset.name + new_dataset.flush() + assoc.dataset = new_dataset + assoc.flush() + return new_dataset diff -r 3f3712d36034 -r bed484895e2d static/scripts/trackster.js --- a/static/scripts/trackster.js Fri Sep 18 15:18:57 2009 -0400 +++ b/static/scripts/trackster.js Fri Sep 18 15:15:39 2009 -0400 @@ -1,4 +1,8 @@ var DENSITY = 1000; + +var BLOCK_SIZE = 1000; + +var log = function( x, b ) { return Math.log( x ) / Math.log( b ) } var View = function( chr, length, low, high ) { this.chr = chr; @@ -83,9 +87,12 @@ high = this.view.high, range = high - low; - var resolution = Math.pow( 10, Math.ceil( Math.log( range / DENSITY ) / Math.log( 10 ) ) ); + var resolution = Math.pow( BLOCK_SIZE, Math.floor( log( range, BLOCK_SIZE ) ) ); + // Math.pow( 10, Math.ceil( Math.log( range / DENSITY ) / Math.log( 10 ) ) ); + + console//.log( "resolution:", resolution ); resolution = Math.max( resolution, 1 ); - resolution = Math.min( resolution, 100000 ); + resolution = Math.min( resolution, 1000000 ); var parent_element = $("<div style='position: relative;'></div>"); this.content_div.children( ":first" ).remove(); @@ -155,7 +162,7 @@ // use closure to preserve this and parameters for getJSON var fetcher = function (ref) { return function () { - $.getJSON( TRACKSTER_DATA_URL + ref.type, { chrom: ref.view.chr, low: low, high: high, dataset_id: ref.track.dataset_id }, function ( data ) { + $.getJSON( TRACKSTER_DATA_URL, { track_type: ref.type, chrom: ref.view.chr, low: low, high: high, dataset_id: ref.track.dataset_id }, function ( data ) { if( data == "pending" ) { setTimeout( fetcher, 5000 ); } else { @@ -175,7 +182,7 @@ Track.call( this, name, view, parent_element ); this.container_div.addClass( "line-track" ); this.dataset_id = dataset_id; - this.cache = new DataCache( "", this, view ); + this.cache = new DataCache( "line", this, view ); }; $.extend( LineTrack.prototype, TiledTrack.prototype, { make_container: function () { @@ -209,35 +216,37 @@ var canvas = element; canvas.get(0).width = canvas.width(); canvas.get(0).height = canvas.height(); - var ctx = canvas.get(0).getContext("2d"); - var in_path = false; - ctx.beginPath(); - var data = chunk.values; - for ( var i = 0; i < data.length - 1; i++ ) { - var x1 = data[i][0] - tile_low; - var y1 = data[i][1]; - var x2 = data[i+1][0] - tile_low; - var y2 = data[i+1][1]; - console.log( x1, y1, x2, y2 ); - // Missing data causes us to stop drawing - if ( isNaN( y1 ) || isNaN( y2 ) ) { - in_path = false; - } else { - // Translate - x1 = x1 * w_scale; - x2 = x2 * w_scale; - y1 = h_scale - y1 * ( h_scale ); - y2 = h_scale - y2 * ( h_scale ); - if ( in_path ) { - ctx.lineTo( x1, y1, x2, y2 ); - } else { - ctx.moveTo( x1, y1, x2, y2 ); - in_path = true; - } - } - } - ctx.stroke(); - return element; + var data = chunk.values; + if ( data ) { + var ctx = canvas.get(0).getContext("2d"); + var in_path = false; + ctx.beginPath(); + // console.log( "Drawing tile" ); + for ( var i = 0; i < data.length - 1; i++ ) { + var x1 = data[i][0] - tile_low; + var y1 = data[i][1]; + var x2 = data[i+1][0] - tile_low; + var y2 = data[i+1][1]; + // Missing data causes us to stop drawing + if ( isNaN( y1 ) || isNaN( y2 ) ) { + in_path = false; + } else { + // Translate + x1 = x1 * w_scale; + x2 = x2 * w_scale; + y1 = h_scale - y1 * ( h_scale ); + y2 = h_scale - y2 * ( h_scale ); + if ( in_path ) { + ctx.lineTo( x1, y1, x2, y2 ); + } else { + ctx.moveTo( x1, y1, x2, y2 ); + in_path = true; + } + } + } + ctx.stroke(); + } + return element; } });