commit/galaxy-central: jgoecks: SummaryTree improvements: (a) establish and use attribute defaults; (b) use dynamic cutoffs for detail/draw rather than static ones.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/ff6f0c1bb28c/ changeset: ff6f0c1bb28c user: jgoecks date: 2012-07-12 17:08:08 summary: SummaryTree improvements: (a) establish and use attribute defaults; (b) use dynamic cutoffs for detail/draw rather than static ones. affected #: 5 files diff -r 1c17293afee255b476cf5f3154c6261519a84c68 -r ff6f0c1bb28c1d64469315968fbc56dd9fbaf680 lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py --- a/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py +++ b/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py @@ -49,7 +49,7 @@ end_col=end_col, strand_col=strand_col, fix_strand=True ) - st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30) + st = SummaryTree() for feature in list( reader_wrapper ): if isinstance( feature, GenomicInterval ): # Tree expects BED coordinates. diff -r 1c17293afee255b476cf5f3154c6261519a84c68 -r ff6f0c1bb28c1d64469315968fbc56dd9fbaf680 lib/galaxy/datatypes/converters/sam_or_bam_to_summary_tree_converter.py --- a/lib/galaxy/datatypes/converters/sam_or_bam_to_summary_tree_converter.py +++ b/lib/galaxy/datatypes/converters/sam_or_bam_to_summary_tree_converter.py @@ -32,7 +32,7 @@ out_fname = args[1] samfile = csamtools.Samfile( filename=input_fname, mode='r' ) - st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30) + st = SummaryTree() for read in samfile.fetch(): st.insert_range( samfile.getrname( read.rname ), read.pos, read.pos + read.rlen ) diff -r 1c17293afee255b476cf5f3154c6261519a84c68 -r ff6f0c1bb28c1d64469315968fbc56dd9fbaf680 lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.py --- a/lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.py +++ b/lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.py @@ -18,7 +18,7 @@ in_file, out_file = args # Do conversion. - st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30) + st = SummaryTree() for line in list( galaxy_utils.sequence.vcf.Reader( open( in_file ) ) ): # VCF format provides a chrom and 1-based position for each variant. # SummaryTree expects 0-based coordinates. diff -r 1c17293afee255b476cf5f3154c6261519a84c68 -r ff6f0c1bb28c1d64469315968fbc56dd9fbaf680 lib/galaxy/visualization/tracks/data_providers.py --- a/lib/galaxy/visualization/tracks/data_providers.py +++ b/lib/galaxy/visualization/tracks/data_providers.py @@ -621,13 +621,13 @@ """ dataset_summary = [] for chrom_info in chroms_info[ 'chrom_info' ]: - summary = self.get_summary( chrom_info[ 'chrom' ], 0, chrom_info[ 'len' ], level=4 ) + summary = self.get_summary( chrom_info[ 'chrom' ], 0, chrom_info[ 'len' ], level=3, detail_cutoff=0, draw_cutoff=0 ) dataset_summary.append( summary ) return dataset_summary # TODO: rename to get_data to match other providers. - def get_summary( self, chrom, start, end, level=None, resolution=None ): + def get_summary( self, chrom, start, end, level=None, resolution=None, detail_cutoff=None, draw_cutoff=None ): """ Returns summary tree data for a given genomic region. """ @@ -663,7 +663,7 @@ # Use level to get results. stats = st.chrom_stats[ chrom ] - results = st.query( chrom, int(start), int(end), level ) + results = st.query( chrom, int(start), int(end), level, detail_cutoff=detail_cutoff, draw_cutoff=draw_cutoff ) if results == "detail" or results == "draw": return results else: diff -r 1c17293afee255b476cf5f3154c6261519a84c68 -r ff6f0c1bb28c1d64469315968fbc56dd9fbaf680 lib/galaxy/visualization/tracks/summary.py --- a/lib/galaxy/visualization/tracks/summary.py +++ b/lib/galaxy/visualization/tracks/summary.py @@ -11,7 +11,7 @@ MIN_LEVEL = 2 class SummaryTree: - def __init__( self, block_size, levels, draw_cutoff, detail_cutoff ): + def __init__( self, block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30 ): self.chrom_blocks = {} self.levels = levels self.draw_cutoff = draw_cutoff @@ -47,38 +47,46 @@ block_level[ block ] = 1 def finish( self ): - """ Checks for cutoff and only stores levels above it """ + """ Compute stats for levels. """ - # TODO: not storing all counts is lossy. To fix, store all counts - # and then dynamically set draw/detail level either on load or - # use cutoffs in query function. for chrom, blocks in self.chrom_blocks.iteritems(): - cur_best = 999 - for level in range( self.levels, MIN_LEVEL-1, -1 ): + for level in range( self.levels, MIN_LEVEL - 1, -1 ): + # Set level's stats. max_val = max( blocks[ level ].values() ) - if max_val < self.draw_cutoff: - if "draw_level" not in self.chrom_stats[ chrom ]: - self.chrom_stats[ chrom ][ "draw_level" ] = level - elif max_val < self.detail_cutoff: - self.chrom_stats[ chrom ][ "detail_level" ] = level - break - else: - self.chrom_stats[ chrom ][ level ] = {} - self.chrom_stats[ chrom ][ level ][ "delta" ] = self.block_size ** level - self.chrom_stats[ chrom ][ level ][ "max" ] = max_val - self.chrom_stats[ chrom ][ level ][ "avg" ] = float( max_val ) / len( blocks[ level ] ) - cur_best = level + self.chrom_stats[ chrom ][ level ] = {} + self.chrom_stats[ chrom ][ level ][ "delta" ] = self.block_size ** level + self.chrom_stats[ chrom ][ level ][ "max" ] = max_val + self.chrom_stats[ chrom ][ level ][ "avg" ] = float( max_val ) / len( blocks[ level ] ) - self.chrom_blocks[ chrom ] = dict( [ ( key, value ) for key, value in blocks.iteritems() if key >= cur_best ] ) + self.chrom_blocks[ chrom ] = dict( [ ( key, value ) for key, value in blocks.iteritems() ] ) - def query( self, chrom, start, end, level ): + def query( self, chrom, start, end, level, draw_cutoff=None, detail_cutoff=None ): """ Queries tree for data. """ + + # Set cutoffs to self's attributes if not defined. + if draw_cutoff != 0: + draw_cutoff = self.draw_cutoff + if detail_cutoff != 0: + detail_cutoff = self.detail_cutoff + + # Get data. if chrom in self.chrom_blocks: stats = self.chrom_stats[ chrom ] + + # For backwards compatibility: if "detail_level" in stats and level <= stats[ "detail_level" ]: return "detail" elif "draw_level" in stats and level <= stats[ "draw_level" ]: return "draw" + + # If below draw, detail level, return string to denote this. + max = stats[ level ][ "max" ] + if max < detail_cutoff: + return "detail" + if max < draw_cutoff: + return "draw" + + # Return block data. blocks = self.chrom_blocks[ chrom ] results = [] multiplier = self.block_size ** level Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket