commit/galaxy-central: jgoecks: Trackster: improve method for fetching additional data and simply data fetching.
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/c6452f6558ee/ changeset: c6452f6558ee user: jgoecks date: 2011-09-13 22:55:17 summary: Trackster: improve method for fetching additional data and simply data fetching. affected #: 3 files (1.4 KB) --- a/lib/galaxy/visualization/tracks/data_providers.py Mon Sep 12 14:51:26 2011 -0400 +++ b/lib/galaxy/visualization/tracks/data_providers.py Tue Sep 13 16:55:17 2011 -0400 @@ -3,7 +3,7 @@ """ import sys -from math import floor, ceil, log, pow +from math import ceil, log import pkg_resources pkg_resources.require( "bx-python" ) if sys.version_info[:2] == (2, 4): @@ -13,14 +13,12 @@ from galaxy.datatypes.util.gff_util import * from galaxy.util.json import from_json_string from bx.interval_index_file import Indexes -from bx.arrays.array_tree import FileArrayTreeDict from bx.bbi.bigwig_file import BigWigFile from galaxy.util.lrucache import LRUCache from galaxy.visualization.tracks.summary import * import galaxy_utils.sequence.vcf from galaxy.datatypes.tabular import Vcf from galaxy.datatypes.interval import Bed, Gff, Gtf -from galaxy.datatypes.util.gff_util import parse_gff_attributes from pysam import csamtools, ctabix @@ -32,7 +30,20 @@ return None else: return float(n) - + +def get_bounds( reads, start_pos_index, end_pos_index ): + """ + Returns the minimum and maximum position for a set of reads. + """ + max_low = sys.maxint + max_high = -sys.maxint + for read in reads: + if read[ start_pos_index ] < max_low: + max_low = read[ start_pos_index ] + if read[ end_pos_index ] > max_high: + max_high = read[ end_pos_index ] + return max_low, max_high + class TracksDataProvider( object ): """ Base class for tracks data providers. """ @@ -75,8 +86,11 @@ def get_data( self, chrom, start, end, start_val=0, max_vals=None, **kwargs ): """ Returns data in region defined by chrom, start, and end. start_val and - max_vals are used to denote the data to return: start_val is the first value to + max_vals are used to denote the data to return: start_val is the first element to return and max_vals indicates the number of values to return. + + Return value must be a dictionary with the following attributes: + dataset_type, data """ # Override. pass @@ -218,18 +232,23 @@ def get_data( self, chrom, start, end, start_val=0, max_vals=sys.maxint, **kwargs ): """ - Fetch reads in the region. + Fetch reads in the region and additional metadata. - Each read is a list with the format - [<guid>, <start>, <end>, <name>, <read_1>, <read_2>] - where <read_1> has the format - [<start>, <end>, <cigar>, ?<read_seq>?] - and <read_2> has the format - [<start>, <end>, <cigar>, ?<read_seq>?] - For single-end reads, read has format: - [<guid>, <start>, <end>, <name>, cigar, seq] - NOTE: read end and sequence data are not valid for reads outside of - requested region and should not be used. + Returns a dict with the following attributes: + data - a list of reads with the format + [<guid>, <start>, <end>, <name>, <read_1>, <read_2>] + where <read_1> has the format + [<start>, <end>, <cigar>, ?<read_seq>?] + and <read_2> has the format + [<start>, <end>, <cigar>, ?<read_seq>?] + For single-end reads, read has format: + [<guid>, <start>, <end>, <name>, cigar, seq] + NOTE: read end and sequence data are not valid for reads outside of + requested region and should not be used. + + max_low - lowest coordinate for the returned reads + max_high - highest coordinate for the returned reads + message - error/informative message """ start, end = int(start), int(end) orig_data_filename = self.original_dataset.file_name @@ -304,9 +323,13 @@ r2 = [ read['mate_start'], read['mate_start'] ] results.append( [ "%i_%s" % ( read_start, qname ), read_start, read_end, qname, r1, r2 ] ) - + + # Clean up. bamfile.close() - return { 'data': results, 'message': message } + + max_low, max_high = get_bounds( results, 1, 2 ) + + return { 'data': results, 'message': message, 'max_low': max_low, 'max_high': max_high } class BBIDataProvider( TracksDataProvider ): """ @@ -334,9 +357,10 @@ return None all_dat = all_dat[0] # only 1 summary - return { 'max': float( all_dat['max'] ), \ - 'min': float( all_dat['min'] ), \ - 'total_frequency': float( all_dat['coverage'] ) } + return { 'data' : { 'max': float( all_dat['max'] ), \ + 'min': float( all_dat['min'] ), \ + 'total_frequency': float( all_dat['coverage'] ) } \ + } start = int(start) end = int(end) @@ -361,7 +385,7 @@ result.append( (pos, float_nan(dat_dict['mean']) ) ) pos += step_size - return result + return { 'data': result } class BigBedDataProvider( BBIDataProvider ): def _get_dataset( self ): @@ -654,7 +678,7 @@ float( feature[5] )] rval.append(payload) - return { 'data_type' : 'vcf', 'data': rval, 'message': message } + return { 'data': rval, 'message': message } class GFFDataProvider( TracksDataProvider ): """ --- a/lib/galaxy/web/controllers/tracks.py Mon Sep 12 14:51:26 2011 -0400 +++ b/lib/galaxy/web/controllers/tracks.py Tue Sep 13 16:55:17 2011 -0400 @@ -530,15 +530,9 @@ data_provider = data_provider_class( converted_dataset=converted_dataset, original_dataset=dataset, dependencies=deps ) # Get and return data from data_provider. - data = data_provider.get_data( chrom, low, high, int(start_val), int(max_vals), **kwargs ) - message = None - if isinstance(data, dict) and 'message' in data: - message = data['message'] - tracks_dataset_type = data.get( 'data_type', tracks_dataset_type ) - track_data = data['data'] - else: - track_data = data - return { 'dataset_type': tracks_dataset_type, 'extra_info': extra_info, 'data': track_data, 'message': message } + result = data_provider.get_data( chrom, low, high, int(start_val), int(max_vals), **kwargs ) + result.update( { 'dataset_type': tracks_dataset_type, 'extra_info': extra_info } ) + return result @web.json def save( self, trans, **kwargs ): --- a/static/scripts/trackster.js Mon Sep 12 14:51:26 2011 -0400 +++ b/static/scripts/trackster.js Tue Sep 13 16:55:17 2011 -0400 @@ -443,9 +443,9 @@ $.extend(extra_params, {start_val: cur_data.data.length + 1}); } else if (req_type === this.BROAD_DATA_REQ) { - // Set query low to be past the last feature returned so that an area of extreme feature depth - // is bypassed. - query_low = cur_data.data[cur_data.data.length - 1][2] + 1; + // To get past an area of extreme feature depth, set query low to be after either + // (a) the maximum high or HACK/FIXME (b) the end of the last feature returned. + query_low = (cur_data.max_high ? cur_data.max_high : cur_data.data[cur_data.data.length - 1][2]) + 1; } // @@ -463,6 +463,9 @@ // Update data and message. if (result.data) { result.data = cur_data.data.concat(result.data); + if (result.max_low) { + result.max_low = cur_data.max_low; + } if (result.message) { // HACK: replace number in message with current data length. Works but is ugly. result.message = result.message.replace(/[0-9]+/, result.data.length); Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket