2 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/46df437c4059/ Changeset: 46df437c4059 User: jgoecks Date: 2014-07-14 16:13:44 Summary: Clean up for fetching genome reference data for visualization. Affected #: 3 files diff -r 018d08d81c1e58e9aa42e8c5b179c413f6fb509b -r 46df437c405989b2d93fcccc0937aa804f0d9555 lib/galaxy/visualization/genomes.py --- a/lib/galaxy/visualization/genomes.py +++ b/lib/galaxy/visualization/genomes.py @@ -31,10 +31,11 @@ A genomic region on an individual chromosome. """ - def __init__( self, chrom = None, start = 0, end = 0 ): + def __init__( self, chrom = None, start = 0, end = 0, sequence=None ): self.chrom = chrom self.start = int( start ) self.end = int( end ) + self.sequence = sequence def __str__( self ): return self.chrom + ":" + str( self.start ) + "-" + str( self.end ) @@ -356,6 +357,6 @@ twobit = TwoBitFile( open( twobit_file_name ) ) if chrom in twobit: seq_data = twobit[chrom].get( int(low), int(high) ) - return { 'dataset_type': 'refseq', 'data': seq_data } + return GenomeRegion( chrom=chrom, start=low, end=high, sequence=seq_data ) except IOError: return None diff -r 018d08d81c1e58e9aa42e8c5b179c413f6fb509b -r 46df437c405989b2d93fcccc0937aa804f0d9555 lib/galaxy/webapps/galaxy/api/datasets.py --- a/lib/galaxy/webapps/galaxy/api/datasets.py +++ b/lib/galaxy/webapps/galaxy/api/datasets.py @@ -199,9 +199,9 @@ if isinstance( data_provider, (SamDataProvider, BamDataProvider ) ): # Get reference sequence. if dataset.dbkey: - data_dict = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, low=low, high=high ) - if data_dict: - ref_seq = data_dict[ 'data' ] + region = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, low=low, high=high ) + if region: + ref_seq = region.sequence # Get mean depth. if not indexer: diff -r 018d08d81c1e58e9aa42e8c5b179c413f6fb509b -r 46df437c405989b2d93fcccc0937aa804f0d9555 lib/galaxy/webapps/galaxy/api/genomes.py --- a/lib/galaxy/webapps/galaxy/api/genomes.py +++ b/lib/galaxy/webapps/galaxy/api/genomes.py @@ -36,7 +36,8 @@ # Return info. rval = None if reference: - rval = self.app.genomes.reference( trans, dbkey=id, chrom=chrom, low=low, high=high ) + region = self.app.genomes.reference( trans, dbkey=id, chrom=chrom, low=low, high=high ) + rval = { 'dataset_type': 'refseq', 'data': region.sequence } else: rval = self.app.genomes.chroms( trans, dbkey=id, num=num, chrom=chrom, low=low ) return rval https://bitbucket.org/galaxy/galaxy-central/commits/d09b3a2bd7b2/ Changeset: d09b3a2bd7b2 User: jgoecks Date: 2014-07-14 17:35:14 Summary: Aligned read visualization data providers: do reference-based compression for complete read rather than read in requested region so that read representation is uniform. Affected #: 3 files diff -r 46df437c405989b2d93fcccc0937aa804f0d9555 -r d09b3a2bd7b2f0fe07f9894923b9a73d02176a82 lib/galaxy/visualization/data_providers/cigar.py --- a/lib/galaxy/visualization/data_providers/cigar.py +++ b/lib/galaxy/visualization/data_providers/cigar.py @@ -18,7 +18,7 @@ return read_seq, cigar # Set up position for reference, read. - ref_seq_pos = read_start - ref_seq_start + ref_seq_pos = read_start read_pos = 0 # Create new read sequence, cigar. @@ -27,42 +27,30 @@ cigar_ops = 'MIDNSHP=X' for op_tuple in cigar: op, op_len = op_tuple - + # Op is index into string 'MIDNSHP=X' if op == 0: # Match - # If region falls outside ref_seq data, leave as M. - if ref_seq_start - read_start > op_len: - # Region falls completely outside of reference. - new_cigar += '%iM' % ( op_len ) - else: - # Some of region overlap reference. - total_count = 0 - if read_start < ref_seq_start: - new_cigar += '%iM' % ( ref_seq_start - read_start ) - read_pos = ref_seq_start - read_start - ref_seq_pos = 0 - total_count = read_pos + # Transform Ms to =s and Xs using reference. + new_op = '' + total_count = 0 + while total_count < op_len and ref_seq_pos < len( ref_seq ): + match, count = _match_mismatch_counter( read_seq, read_pos, ref_seq, ref_seq_pos ) + # Use min because count cannot exceed remainder of operation. + count = min( count, op_len - total_count ) + if match: + new_op = '=' + else: + new_op = 'X' + # Include mismatched bases in new read sequence. + new_read_seq += read_seq[ read_pos:read_pos + count ] + new_cigar += '%i%s' % ( count, new_op ) + total_count += count + read_pos += count + ref_seq_pos += count - # Transform Ms to =s and Xs using reference. - new_op = '' - while total_count < op_len and ref_seq_pos < len( ref_seq ): - match, count = _match_mismatch_counter( read_seq, read_pos, ref_seq, ref_seq_pos ) - # Use min because count cannot exceed remainder of operation. - count = min( count, op_len - total_count ) - if match: - new_op = '=' - else: - new_op = 'X' - # Include mismatched bases in new read sequence. - new_read_seq += read_seq[ read_pos:read_pos + count ] - new_cigar += '%i%s' % ( count, new_op ) - total_count += count - read_pos += count - ref_seq_pos += count - - # If end of read falls outside of ref_seq data, leave as M. - if total_count < op_len: - new_cigar += '%iM' % ( op_len - total_count ) + # If end of read falls outside of ref_seq data, leave as M. + if total_count < op_len: + new_cigar += '%iM' % ( op_len - total_count ) elif op == 1: # Insertion new_cigar += '%i%s' % ( op_len, cigar_ops[ op ] ) # Include insertion bases in new read sequence. diff -r 46df437c405989b2d93fcccc0937aa804f0d9555 -r d09b3a2bd7b2f0fe07f9894923b9a73d02176a82 lib/galaxy/visualization/data_providers/genome.py --- a/lib/galaxy/visualization/data_providers/genome.py +++ b/lib/galaxy/visualization/data_providers/genome.py @@ -1068,8 +1068,8 @@ ''' read_seq, read_cigar = get_ref_based_read_seq_and_cigar( read[ seq_field ].upper(), read[ start_field ], - ref_seq, - start, + ref_seq.sequence, + ref_seq.start, read[ cigar_field ] ) read[ seq_field ] = read_seq read[ cigar_field ] = read_cigar @@ -1088,7 +1088,7 @@ # if possible. Otherwise, convert cigar. if ref_seq: # Uppercase for easy comparison. - ref_seq = ref_seq.upper() + ref_seq.sequence = ref_seq.sequence.upper() process_read = compress_seq_and_cigar else: process_read = convert_cigar diff -r 46df437c405989b2d93fcccc0937aa804f0d9555 -r d09b3a2bd7b2f0fe07f9894923b9a73d02176a82 lib/galaxy/webapps/galaxy/api/datasets.py --- a/lib/galaxy/webapps/galaxy/api/datasets.py +++ b/lib/galaxy/webapps/galaxy/api/datasets.py @@ -199,9 +199,11 @@ if isinstance( data_provider, (SamDataProvider, BamDataProvider ) ): # Get reference sequence. if dataset.dbkey: - region = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, low=low, high=high ) - if region: - ref_seq = region.sequence + # FIXME: increase region 500bp each way to provide sequence for overlapping reads. As reads + # get longer, this will need to be increased and/or a handle to the genomic data may be need + # to be given to the data provider. + region = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, + low=( int( low ) - 500 ), high=( int( high ) + 500 ) ) # Get mean depth. if not indexer: @@ -212,7 +214,7 @@ # Get and return data from data_provider. result = data_provider.get_data( chrom, int( low ), int( high ), int( start_val ), int( max_vals ), - ref_seq=ref_seq, mean_depth=mean_depth, **kwargs ) + ref_seq=region, mean_depth=mean_depth, **kwargs ) result.update( { 'dataset_type': data_provider.dataset_type, 'extra_info': extra_info } ) return result Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.