commit/galaxy-central: jgoecks: Trackster: Implement and use reference-based read drawing when genome reference is available. This reduces the data transfer for read tracks by ~50%. Fix bugs in reference-based data provider code and remove client-side debugging statements.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/ecbaac8a8cce/ Changeset: ecbaac8a8cce User: jgoecks Date: 2013-04-10 16:34:30 Summary: Trackster: Implement and use reference-based read drawing when genome reference is available. This reduces the data transfer for read tracks by ~50%. Fix bugs in reference-based data provider code and remove client-side debugging statements. Affected #: 4 files diff -r cf2313ae788daca198be009c8c16ec4b27b551ed -r ecbaac8a8cce6a6a08efc12512adff54c5a8fa63 lib/galaxy/visualization/data_providers/cigar.py --- a/lib/galaxy/visualization/data_providers/cigar.py +++ b/lib/galaxy/visualization/data_providers/cigar.py @@ -23,54 +23,66 @@ # Create new read sequence, cigar. new_read_seq = '' - new_cigar = [] + new_cigar = '' + cigar_ops = 'MIDNSHP=X' for op_tuple in cigar: op, op_len = op_tuple # Op is index into string 'MIDNSHP=X' if op == 0: # Match - # Transform Ms to =s and Xs. - new_op = [] - total_count = 0 - while total_count < op_len and ref_seq_pos < len( ref_seq ): - match, count = _match_mismatch_counter( read_seq, read_pos, ref_seq, ref_seq_pos ) - # Use min because count cannot exceed remainder of operation. - count = min( count, op_len - total_count ) - if match: - new_op = 7 - else: - new_op = 8 - # Include mismatched bases in new read sequence. - new_read_seq += read_seq[ read_pos:read_pos + count ] - new_cigar.append( ( new_op, count ) ) - total_count += count - read_pos += count - ref_seq_pos += count - - # If part of read falls outside of ref_seq dat, then leave - # part as M. - if total_count < op_len: - new_cigar.append( ( 0, op_len - total_count ) ) + # If region falls outside ref_seq data, leave as M. + if ref_seq_start - read_start > op_len: + # Region falls completely outside of reference. + new_cigar += '%iM' % ( op_len ) + else: + # Some of region overlap reference. + total_count = 0 + if read_start < ref_seq_start: + new_cigar += '%iM' % ( ref_seq_start - read_start ) + read_pos = ref_seq_start - read_start + ref_seq_pos = 0 + total_count = read_pos + + # Transform Ms to =s and Xs using reference. + new_op = '' + while total_count < op_len and ref_seq_pos < len( ref_seq ): + match, count = _match_mismatch_counter( read_seq, read_pos, ref_seq, ref_seq_pos ) + # Use min because count cannot exceed remainder of operation. + count = min( count, op_len - total_count ) + if match: + new_op = '=' + else: + new_op = 'X' + # Include mismatched bases in new read sequence. + new_read_seq += read_seq[ read_pos:read_pos + count ] + new_cigar += '%i%s' % ( count, new_op ) + total_count += count + read_pos += count + ref_seq_pos += count + + # If end of read falls outside of ref_seq data, leave as M. + if total_count < op_len: + new_cigar += '%iM' % ( op_len - total_count ) elif op == 1: # Insertion - new_cigar.append( op_tuple ) + new_cigar += '%i%s' % ( op_len, cigar_ops[ op ] ) # Include insertion bases in new read sequence. new_read_seq += read_seq[ read_pos:read_pos + op_len ] read_pos += op_len elif op in [ 2, 3, 6 ]: # Deletion, Skip, or Padding ref_seq_pos += op_len - new_cigar.append( op_tuple ) + new_cigar += '%i%s' % ( op_len, cigar_ops[ op ] ) elif op == 4: # Soft clipping read_pos += op_len - new_cigar.append( op_tuple ) + new_cigar += '%i%s' % ( op_len, cigar_ops[ op ] ) elif op == 5: # Hard clipping - new_cigar.append( op_tuple ) + new_cigar += '%i%s' % ( op_len, cigar_ops[ op ] ) elif op in [ 7, 8 ]: # Match or mismatch if op == 8: # Include mismatched bases in new read sequence. new_read_seq += read_seq[ read_pos:read_pos + op_len ] read_pos += op_len ref_seq_pos += op_len - new_cigar.append( op_tuple ) + new_cigar += '%i%s' % ( op_len, cigar_ops[ op ] ) return ( new_read_seq, new_cigar ) diff -r cf2313ae788daca198be009c8c16ec4b27b551ed -r ecbaac8a8cce6a6a08efc12512adff54c5a8fa63 lib/galaxy/visualization/data_providers/genome.py --- a/lib/galaxy/visualization/data_providers/genome.py +++ b/lib/galaxy/visualization/data_providers/genome.py @@ -16,6 +16,7 @@ from galaxy.util.lrucache import LRUCache from galaxy.visualization.tracks.summary import summary_tree_from_file from galaxy.visualization.data_providers.basic import BaseDataProvider +from galaxy.visualization.data_providers.cigar import get_ref_based_read_seq_and_cigar from galaxy.datatypes.interval import Bed, Gff, Gtf from pysam import csamtools, ctabix @@ -935,12 +936,10 @@ return "+" else: return "-" - + # # Encode reads as list of lists. # - if ref_seq: - ref_seq = ref_seq.upper() results = [] paired_pending = {} unmapped = 0 @@ -1010,6 +1009,36 @@ # Clean up. TODO: is this needed? If so, we'll need a cleanup function after processing the data. # bamfile.close() + + # If there are results and reference data, transform read sequence and cigar. + if len( results ) != 0 and ref_seq: + def process_se_read( read ): + ''' + Process single-end read. + ''' + read_seq, read_cigar = get_ref_based_read_seq_and_cigar( read[ 6 ].upper(), read[ 1 ], + ref_seq, start, read[ 4 ] ) + read[ 6 ] = read_seq + read[ 4 ] = read_cigar + + def process_pe_read( read ): + ''' + Process paired-end read. + ''' + process_se_read( read[ 4 ] ) + process_se_read( read[ 5 ] ) + + # Uppercase for easy comparison. + ref_seq = ref_seq.upper() + + # Choose correct function for processing reads. + process_fn = process_se_read + if isinstance( results[ 0 ][ 5 ], list ): + process_fn = process_pe_read + + # Process reads. + for read in results: + process_fn( read ) max_low, max_high = get_bounds( results, 1, 2 ) diff -r cf2313ae788daca198be009c8c16ec4b27b551ed -r ecbaac8a8cce6a6a08efc12512adff54c5a8fa63 static/scripts/viz/trackster/painters.js --- a/static/scripts/viz/trackster/painters.js +++ b/static/scripts/viz/trackster/painters.js @@ -271,9 +271,7 @@ ctx.fillRect(x_scaled, 0, delta_x_px, height_px); } else { - // console.log(y, track.min_value, track.vertical_range, (y - track.min_value) / track.vertical_range * track.height_px); y = Math.round( height_px - (y - min_value) / vertical_range * height_px ); - // console.log(canvas.get(0).height, canvas.get(0).width); if (in_path) { ctx.lineTo(x_scaled, y); } @@ -730,6 +728,52 @@ } return height; }, + + /** + * Parse CIGAR string to get (a) a list of contiguous drawing blocks (MD=X) and + * (b) an array of [ op_index, op_len ] pairs where op_index is an index into the + * string 'MIDNSHP=X' Return value is a dictionary with two entries, blocks and cigar + */ + _parse_cigar: function(cigar_str) { + var cigar_ops = 'MIDNSHP=X'; + + // Parse cigar. + var blocks = [ [0, 0] ], + cur_block = blocks[0], + base_pos = 0, + + // Parse cigar operations out and update/create blocks as needed. + parsed_cigar = _.map(cigar_str.match(/[0-9]+[MIDNSHP=X]/g), function(op) { + // Get operation length, character. + var op_len = parseInt(op.slice(0, -1), 10), + op_char = op.slice(-1); + + // Update drawing block. + if (op_char === 'N') { + // At skip, so need to start new block if current block represents + // drawing area. + if (cur_block[1] !== 0) { + cur_block = [base_pos + op_len, base_pos + op_len]; + blocks.push(cur_block); + } + } + else if ('ISHP'.indexOf(op_char) === -1) { + // Operation is M,D,=,X. + cur_block[1] += op_len; + base_pos += op_len; + } + + // Return parsed cigar. + return [ cigar_ops.indexOf(op_char), op_len ]; + }); + + return { + blocks: blocks, + cigar: parsed_cigar + }; + }, + + // FIXME: extract common functionality from draw_read functions for ReadPainters. /** * Draw a single read. @@ -758,10 +802,6 @@ cig_op = "MIDNSHP=X"[ cig[0] ], cig_len = cig[1]; - if (cig_op === "H" || cig_op === "S") { - // Go left if it clips - base_offset -= cig_len; - } var seq_start = feature_start + base_offset, // -0.5 to offset sequence between bases. s_start = Math.floor( Math.max(-0.5 * w_scale, (seq_start - tile_low - 0.5) * w_scale) ), @@ -777,12 +817,14 @@ switch (cig_op) { case "H": // Hard clipping. - // TODO: draw anything? // Sequence not present, so do not increment seq_offset. break; case "S": // Soft clipping. - case "M": // Match. - case "=": // Equals. + seq_offset += cig_len; + break; + case "M": // Loose match with reference; can be match or mismatch. + case "=": // Strict match with reference. + case "X": // Strict mismatch with reference. if (is_overlap([seq_start, seq_start + cig_len], tile_region)) { // Draw read base as rectangle. ctx.fillStyle = block_color; @@ -794,15 +836,13 @@ // Draw sequence and/or variants. var seq = read_seq.slice(seq_offset, seq_offset + cig_len), ref_char, - read_char, - x_pos; + read_char; for (var c = 0, str_len = seq.length; c < str_len; c++) { // Draw base if it's on tile: if (seq_start + c >= tile_low && seq_start + c <= tile_high) { // Get reference and read character. ref_char = (this.ref_seq ? this.ref_seq[seq_start - tile_low + c] : null); read_char = seq[c]; - x_pos = Math.floor( Math.max(0, (seq_start + c - tile_low) * w_scale) ); // Draw base depending on (a) available reference data and (b) config options. if ( @@ -841,13 +881,11 @@ ctx.fillStyle = CONNECTOR_COLOR; ctx.fillRect(s_start, y_center + 5, s_end - s_start, 1); //ctx.dashedLine(s_start + this.left_offset, y_center + 5, this.left_offset + s_end, y_center + 5); - // No change in seq_offset because sequence not used when skipping. base_offset += cig_len; break; case "D": // Deletion. ctx.fillStyle = "black"; ctx.fillRect(s_start, y_center + 4, s_end - s_start, 3); - // TODO: is this true? No change in seq_offset because sequence not used when skipping. base_offset += cig_len; break; case "P": // TODO: No good way to draw insertions/padding right now, so ignore @@ -918,10 +956,6 @@ seq_offset += cig_len; // No change to base offset because insertions are drawn above sequence/read. break; - case "X": - // TODO: draw something? - seq_offset += cig_len; - break; } } @@ -960,6 +994,7 @@ f_end = Math.ceil( Math.min(width, Math.max(0, (feature_end - tile_low - 0.5) * w_scale)) ), y_center = (mode === "Dense" ? 0 : (0 + slot)) * y_scale, label_color = this.prefs.label_color; + // Draw read. if (feature[5] instanceof Array) { @@ -971,7 +1006,7 @@ connector = true; // Draw left/forward read. - if (feature[4][1] >= tile_low && feature[4][0] <= tile_high && feature[4][2]) { + if (feature[4][1] >= tile_low && feature[4][0] <= tile_high && feature[4][2]) { this.draw_read(ctx, mode, w_scale, y_center, tile_low, tile_high, feature[4][0], feature[4][2], feature[4][3], feature[4][4]); } else { @@ -1015,6 +1050,244 @@ } }); +/** + * Painter for reads encoded using reference-based compression. + */ +var RefBasedReadPainter = function(data, view_start, view_end, prefs, mode, alpha_scaler, height_scaler, ref_seq, base_color_fn) { + ReadPainter.call(this, data, view_start, view_end, prefs, mode, alpha_scaler, height_scaler, ref_seq, base_color_fn); +}; + +extend(RefBasedReadPainter.prototype, ReadPainter.prototype, FeaturePainter, { + + /** + * Draw a single read from reference-based read sequence and cigar. + */ + draw_read: function(ctx, mode, w_scale, y_center, tile_low, tile_high, feature_start, cigar, strand, read_seq) { + ctx.textAlign = "center"; + var tile_region = [tile_low, tile_high], + base_offset = 0, + seq_offset = 0, + gap = Math.round(w_scale/2), + char_width_px = ctx.canvas.manager.char_width_px, + block_color = (strand === "+" ? this.prefs.block_color : this.prefs.reverse_strand_color), + pack_mode = (mode === 'Pack'), + drawing_blocks = []; + + // Keep list of items that need to be drawn on top of initial drawing layer. + var draw_last = []; + + // Parse cigar and get drawing blocks. + var t = this._parse_cigar(cigar); + cigar = t.cigar; + drawing_blocks = t.blocks; + + // Draw blocks. + for (var i = 0; i < drawing_blocks.length; i++) { + var block = drawing_blocks[i]; + + if (is_overlap([feature_start + block[0], feature_start + block[1]], tile_region)) { + // -0.5 to offset sequence between bases. + var s_start = Math.floor( Math.max(-0.5 * w_scale, (feature_start + block[0] - tile_low - 0.5) * w_scale) ), + s_end = Math.floor( Math.max(0, (feature_start + block[1] - tile_low - 0.5) * w_scale) ); + + // Make sure that block is drawn even if it too small to be rendered officially; in this case, + // read is drawn at 1px. + // TODO: need to ensure that s_start, s_end are calcuated the same for both slotting + // and drawing. + if (s_start === s_end) { + s_end += 1; + } + + // Draw read base as rectangle. + ctx.fillStyle = block_color; + ctx.fillRect(s_start, + y_center + (pack_mode ? 1 : 4 ), + s_end - s_start, + (pack_mode ? PACK_FEATURE_HEIGHT : SQUISH_FEATURE_HEIGHT)); + } + } + + // Draw read features. + for (var cig_id = 0, len = cigar.length; cig_id < len; cig_id++) { + var cig = cigar[cig_id], + cig_op = "MIDNSHP=X"[ cig[0] ], + cig_len = cig[1]; + + var seq_start = feature_start + base_offset, + // -0.5 to offset sequence between bases. + s_start = Math.floor( Math.max(0, -0.5 * w_scale, (seq_start - tile_low - 0.5) * w_scale) ), + s_end = Math.floor( Math.max(0, (seq_start + cig_len - tile_low - 0.5) * w_scale) ); + + // Make sure that read is drawn even if it too small to be rendered officially; in this case, + // read is drawn at 1px. + // TODO: need to ensure that s_start, s_end are calcuated the same for both slotting + // and drawing. + if (s_start === s_end) { + s_end += 1; + } + + switch (cig_op) { + case "H": // Hard clipping. + case "S": // Soft clipping. + case "P": // Padding. + // Sequence not present and not related to alignment; do nothing. + break; + case "M": // "Match". + // Because it's not known whether there is a match, ignore. + base_offset += cig_len; + break; + case "=": // Match with reference. + case "X": // Mismatch with reference. + if (is_overlap([seq_start, seq_start + cig_len], tile_region)) { + // + // Draw sequence and/or variants. + // + + // Get sequence to draw. + var cur_seq = ''; + if (cig_op === 'X') { + // Get sequence from read_seq. + cur_seq = read_seq.slice(seq_offset, seq_offset + cig_len); + } + else if (this.ref_seq) { // && cig_op === '=' + // Use reference sequence. + cur_seq = this.ref_seq.slice( + // If read starts after tile start, slice at read start. + Math.max(0, seq_start - tile_low), + // If read ends before tile end, slice at read end. + Math.min(seq_start - tile_low + cig_len, tile_high - tile_low) + ); + } + + // Draw sequence. Because cur_seq starts and read/tile start, go to there to start writing. + var start_pos = Math.max(seq_start, tile_low); + for (var c = 0; c < cur_seq.length; c++) { + // Draw base if showing all (i.e. not showing differences) or there is a mismatch. + if (cur_seq && !this.prefs.show_differences || cig_op === 'X') { + // Draw base. + var c_start = Math.floor( Math.max(0, (start_pos + c - tile_low) * w_scale) ); + ctx.fillStyle = this.base_color_fn(cur_seq[c]); + if (pack_mode && w_scale > char_width_px) { + ctx.fillText(cur_seq[c], c_start, y_center + 9); + } + // Require a minimum w_scale so that variants are only drawn when somewhat zoomed in. + else if (w_scale > 0.05) { + ctx.fillRect(c_start, + y_center + (pack_mode ? 1 : 4), + Math.max( 1, Math.round(w_scale) ), + (pack_mode ? PACK_FEATURE_HEIGHT : SQUISH_FEATURE_HEIGHT)); + } + } + } + } + + // Move forward in sequence only if sequence used to get mismatches. + if (cig_op === 'X') { seq_offset += cig_len; } + base_offset += cig_len; + + break; + case "N": // Skipped bases. + ctx.fillStyle = CONNECTOR_COLOR; + ctx.fillRect(s_start, y_center + 5, s_end - s_start, 1); + //ctx.dashedLine(s_start + this.left_offset, y_center + 5, this.left_offset + s_end, y_center + 5); + // No change in seq_offset because sequence not used when skipping. + base_offset += cig_len; + break; + case "D": // Deletion. + ctx.fillStyle = "black"; + ctx.fillRect(s_start, y_center + 4, s_end - s_start, 3); + base_offset += cig_len; + break; + case "I": // Insertion. + // Check to see if sequence should be drawn at all by looking at the overlap between + // the sequence region and the tile region. + var insert_x_coord = s_start - gap; + + if (is_overlap([seq_start, seq_start + cig_len], tile_region)) { + var seq = read_seq.slice(seq_offset, seq_offset + cig_len); + // Insertion point is between the sequence start and the previous base: (-gap) moves + // back from sequence start to insertion point. + if (this.prefs.show_insertions) { + // + // Show inserted sequence above, centered on insertion point. + // + + // Draw sequence. + // X center is offset + start - <half_sequence_length> + var x_center = s_start - (s_end - s_start)/2; + if ( (mode === "Pack" || this.mode === "Auto") && read_seq !== undefined && w_scale > char_width_px) { + // Draw sequence container. + ctx.fillStyle = "yellow"; + ctx.fillRect(x_center - gap, y_center - 9, s_end - s_start, 9); + draw_last[draw_last.length] = {type: "triangle", data: [insert_x_coord, y_center + 4, 5]}; + ctx.fillStyle = CONNECTOR_COLOR; + // Based on overlap b/t sequence and tile, get sequence to be drawn. + switch( compute_overlap( [seq_start, seq_start + cig_len], tile_region ) ) { + case(OVERLAP_START): + seq = seq.slice(tile_low-seq_start); + break; + case(OVERLAP_END): + seq = seq.slice(0, seq_start-tile_high); + break; + case(CONTAINED_BY): + // All of sequence drawn. + break; + case(CONTAINS): + seq = seq.slice(tile_low-seq_start, seq_start-tile_high); + break; + } + // Draw sequence. + for (var c = 0, str_len = seq.length; c < str_len; c++) { + var c_start = Math.floor( Math.max(0, (seq_start + c - tile_low) * w_scale) ); + ctx.fillText(seq[c], c_start - (s_end - s_start)/2, y_center); + } + } + else { + // Draw block. + ctx.fillStyle = "yellow"; + // TODO: This is a pretty hack-ish way to fill rectangle based on mode. + ctx.fillRect(x_center, y_center + (this.mode !== "Dense" ? 2 : 5), + s_end - s_start, (mode !== "Dense" ? SQUISH_FEATURE_HEIGHT : DENSE_FEATURE_HEIGHT)); + } + } + else { + if ( (mode === "Pack" || this.mode === "Auto") && read_seq !== undefined && w_scale > char_width_px) { + // Show insertions with a single number at the insertion point. + draw_last.push( { type: "text", data: [seq.length, insert_x_coord, y_center + 9] } ); + } + else { + // TODO: probably can merge this case with code above. + } + } + } + seq_offset += cig_len; + // No change to base offset because insertions are drawn above sequence/read. + break; + } + } + + // + // Draw last items. + // + ctx.fillStyle = "yellow"; + var item, type, data; + for (var i = 0; i < draw_last.length; i++) { + item = draw_last[i]; + type = item.type; + data = item.data; + if (type === "text") { + ctx.save(); + ctx.font = "bold " + ctx.font; + ctx.fillText(data[0], data[1], data[2]); + ctx.restore(); + } + else if (type === "triangle") { + drawDownwardEquilateralTriangle(ctx, data[0], data[1], data[2]); + } + } + }, +}); + var ArcLinkedFeaturePainter = function(data, view_start, view_end, prefs, mode, alpha_scaler, height_scaler) { LinkedFeaturePainter.call(this, data, view_start, view_end, prefs, mode, alpha_scaler, height_scaler); // Need to know the longest feature length for adding spacing @@ -1263,8 +1536,6 @@ e2 = scale( d[5] ); value = d[6]; - //console.log( "!!!", value, ramp.map_value( value ) ); - ctx.fillStyle = ( ramp.map_value( value ) ); ctx.fillRect( s1, s2, ( e1 - s1 ), ( e2 - s2 ) ); @@ -1279,6 +1550,7 @@ LinePainter: LinePainter, LinkedFeaturePainter: LinkedFeaturePainter, ReadPainter: ReadPainter, + RefBasedReadPainter: RefBasedReadPainter, ArcLinkedFeaturePainter: ArcLinkedFeaturePainter, DiagonalHeatmapPainter: DiagonalHeatmapPainter }; diff -r cf2313ae788daca198be009c8c16ec4b27b551ed -r ecbaac8a8cce6a6a08efc12512adff54c5a8fa63 static/scripts/viz/trackster/tracks.js --- a/static/scripts/viz/trackster/tracks.js +++ b/static/scripts/viz/trackster/tracks.js @@ -887,7 +887,8 @@ { key: 'a_color', label: 'A Color', type: 'color', default_value: "#FF0000" }, { key: 'c_color', label: 'C Color', type: 'color', default_value: "#00FF00" }, { key: 'g_color', label: 'G Color', type: 'color', default_value: "#0000FF" }, - { key: 't_color', label: 'T Color', type: 'color', default_value: "#FF00FF" } + { key: 't_color', label: 'T Color', type: 'color', default_value: "#FF00FF" }, + { key: 'n_color', label: 'N Color', type: 'color', default_value: "#AAAAAA" }, ], saved_values: obj_dict.prefs, onchange: function() { @@ -1174,6 +1175,7 @@ /** * Load chrom data for the view. Returns a jQuery Deferred. */ + // FIXME: instead of loading chrom data, should load and store genome object. load_chroms: function(url_parms) { url_parms.num = MAX_CHROMS_SELECTABLE; @@ -4199,8 +4201,10 @@ } }); this.prefs = this.config.values; - - this.painter = painters.ReadPainter; + + // Choose painter based on whether there is reference data. + this.painter = (view.reference_track ? painters.RefBasedReadPainter : painters.ReadPainter); + this.update_icons(); }; extend(ReadTrack.prototype, Drawable.prototype, TiledTrack.prototype, FeatureTrack.prototype); Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org