details: http://www.bx.psu.edu/hg/galaxy/rev/98a45b4fe3a2 changeset: 3280:98a45b4fe3a2 user: James Taylor <james@jamestaylor.org> date: Wed Jan 27 12:15:43 2010 -0500 description: trackster: Support ensembl / 1000 genomes style BAM files which don't prefix chromosome numbers with 'chr' diffstat: lib/galaxy/visualization/tracks/data/bam.py | 29 ++++++++++++++++++++++++----- 1 files changed, 24 insertions(+), 5 deletions(-) diffs (51 lines): diff -r 69a04e6bd513 -r 98a45b4fe3a2 lib/galaxy/visualization/tracks/data/bam.py --- a/lib/galaxy/visualization/tracks/data/bam.py Wed Jan 27 08:51:23 2010 -0500 +++ b/lib/galaxy/visualization/tracks/data/bam.py Wed Jan 27 12:15:43 2010 -0500 @@ -1,23 +1,42 @@ +""" +Visualization data provider for BAM format. +""" + import pkg_resources; pkg_resources.require( "pysam" ) + from pysam import csamtools from math import floor, ceil, log import logging class BamDataProvider( object ): + """ + Provides access to intervals from a sorted indexed BAM file. + """ def __init__( self, index, original_dataset ): - self.log = logging.getLogger(__name__) + # self.log = logging.getLogger(__name__) self.index = index self.original_dataset = original_dataset def get_data( self, chrom, start, end, **kwargs ): + """ + Fetch intervals in the region + """ start, end = int(start), int(end) - bamfile = csamtools.Samfile(filename=self.original_dataset.file_name, mode='rb', index_filename=self.index.file_name) - - data = bamfile.fetch(start=start, end=end, reference=chrom) + # Attempt to open the BAM file with index + bamfile = csamtools.Samfile( filename=self.original_dataset.file_name, mode='rb', index_filename=self.index.file_name ) + try: + data = bamfile.fetch(start=start, end=end, reference=chrom) + except ValueError, e: + # Some BAM files do not prefix chromosome names with chr, try + # without + if chrom.startswith( 'chr' ): + data = bamfile.fetch( start=start, end=end, reference=chrom[3:] ) + else: + raise + # Encode reads as list of dictionaries results = [] for read in data: payload = { 'uid': str(read.pos) + str(read.seq), 'start': read.pos, 'end': read.pos + read.rlen, 'name': read.seq } - results.append(payload) bamfile.close() return results