details: http://www.bx.psu.edu/hg/galaxy/rev/ec547440ec97 changeset: 1508:ec547440ec97 user: Dan Blankenberg <dan@bx.psu.edu> date: Tue Sep 16 13:25:42 2008 -0400 description: Small update for maf stats tool. 2 file(s) affected in this change: lib/galaxy/tools/util/maf_utilities.py tools/maf/maf_stats.py diffs (99 lines): diff -r 842f1883cf53 -r ec547440ec97 lib/galaxy/tools/util/maf_utilities.py --- a/lib/galaxy/tools/util/maf_utilities.py Mon Sep 15 15:04:41 2008 -0400 +++ b/lib/galaxy/tools/util/maf_utilities.py Tue Sep 16 13:25:42 2008 -0400 @@ -199,7 +199,7 @@ yield block def get_chopped_blocks_with_index_offset_for_region( index, src, region, species = None, mincols = 0, force_strand = None ): for block, idx, offset in index.get_as_iterator_with_index_and_offset( src, region.start, region.end ): - block = chop_block_by_region( block, src, region, species, mincols ) + block = chop_block_by_region( block, src, region, species, mincols, force_strand ) if block is not None: yield block, idx, offset @@ -209,6 +209,25 @@ else: alignment = RegionAlignment( end - start, primary_species ) return fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand, species, mincols ) +#reduces a block to only positions exisiting in the src provided +def reduce_block_by_primary_genome( block, species, chromosome, region_start ): + #returns ( startIndex, {species:texts} + #where texts' contents are reduced to only positions existing in the primary genome + src = "%s.%s" % ( species, chromosome ) + ref = block.get_component_by_src( src ) + start_offset = ref.start - region_start + species_texts = {} + for c in block.components: + species_texts[ c.src.split( '.' )[0] ] = list( c.text ) + #remove locations which are gaps in the primary species, starting from the downstream end + for i in range( len( species_texts[ species ] ) - 1, -1, -1 ): + if species_texts[ species ][i] == '-': + for text in species_texts.values(): + text.pop( i ) + for spec, text in species_texts.items(): + species_texts[spec] = ''.join( text ) + return ( start_offset, species_texts ) + #fills a region alignment def fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand = '+', species = None, mincols = 0 ): region = bx.intervals.Interval( start, end ) @@ -216,22 +235,7 @@ region.strand = strand primary_src = "%s.%s" % ( primary_species, chrom ) - def reduce_block_by_primary_genome( block ): - #returns ( startIndex, {species:texts} - #where texts' contents are reduced to only positions existing in the primary genome - ref = block.get_component_by_src( primary_src ) - start_offset = ref.start - start - species_texts = {} - for c in block.components: - species_texts[ c.src.split( '.' )[0] ] = list( c.text ) - #remove locations which are gaps in the primary species, starting from the downstream end - for i in range( len( species_texts[ primary_species ] ) - 1, -1, -1 ): - if species_texts[ primary_species ][i] == '-': - for text in species_texts.values(): - text.pop( i ) - for spec, text in species_texts.items(): - species_texts[spec] = ''.join( text ) - return ( start_offset, species_texts ) + #Order blocks overlaping this position by score, lowest first blocks = [] @@ -248,7 +252,7 @@ for block_dict in blocks: block = chop_block_by_region( block_dict[1].get_at_offset( block_dict[2] ), primary_src, region, species, mincols, strand ) if block is None: continue - start_offset, species_texts = reduce_block_by_primary_genome( block ) + start_offset, species_texts = reduce_block_by_primary_genome( block, primary_species, chrom, start ) for spec, text in species_texts.items(): try: alignment.set_range( start_offset, spec, text ) diff -r 842f1883cf53 -r ec547440ec97 tools/maf/maf_stats.py --- a/tools/maf/maf_stats.py Mon Sep 15 15:04:41 2008 -0400 +++ b/tools/maf/maf_stats.py Tue Sep 16 13:25:42 2008 -0400 @@ -64,19 +64,11 @@ for c in block.components: spec = c.src.split( '.' )[0] if spec not in coverage: coverage[spec] = zeros( region.end - region.start, dtype = bool ) - ref = block.get_component_by_src( src ) - #skip gap locations due to insertions in secondary species relative to primary species - start_offset = ref.start - region.start - num_gaps = 0 - for i in range( len( ref.text.rstrip().rstrip( "-" ) ) ): - if ref.text[i] in ["-"]: - num_gaps += 1 - continue - #Toggle base if covered - for comp in block.components: - spec = comp.src.split( '.' )[0] - if comp.text and comp.text[i] not in ['-']: - coverage[spec][start_offset + i - num_gaps] = True + start_offset, alignment = maf_utilities.reduce_block_by_primary_genome( block, dbkey, region.chrom, region.start ) + for i in range( len( alignment[dbkey] ) ): + for spec, text in alignment.items(): + if text[i] != '-': + coverage[spec][start_offset + i] = True if summary: #record summary for key in coverage.keys():