[galaxy-commits] commit/galaxy-central: 2 new changesets

14 Jul 2014

2 new commits in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/46df437c4059/
Changeset:   46df437c4059
User:        jgoecks
Date:        2014-07-14 16:13:44
Summary:     Clean up for fetching genome reference data for visualization.
Affected #:  3 files

diff -r 018d08d81c1e58e9aa42e8c5b179c413f6fb509b -r 46df437c405989b2d93fcccc0937aa804f0d9555 lib/galaxy/visualization/genomes.py

--- a/lib/galaxy/visualization/genomes.py
+++ b/lib/galaxy/visualization/genomes.py
@@ -31,10 +31,11 @@
     A genomic region on an individual chromosome.
     """
 
-    def __init__( self, chrom = None, start = 0, end = 0 ):
+    def __init__( self, chrom = None, start = 0, end = 0, sequence=None ):
         self.chrom = chrom
         self.start = int( start )
         self.end = int( end )
+        self.sequence = sequence
 
     def __str__( self ):
         return self.chrom + ":" + str( self.start ) + "-" + str( self.end )
@@ -356,6 +357,6 @@
             twobit = TwoBitFile( open( twobit_file_name ) )
             if chrom in twobit:
                 seq_data = twobit[chrom].get( int(low), int(high) )
-                return { 'dataset_type': 'refseq', 'data': seq_data }
+                return GenomeRegion( chrom=chrom, start=low, end=high, sequence=seq_data )
         except IOError:
             return None

diff -r 018d08d81c1e58e9aa42e8c5b179c413f6fb509b -r 46df437c405989b2d93fcccc0937aa804f0d9555 lib/galaxy/webapps/galaxy/api/datasets.py
--- a/lib/galaxy/webapps/galaxy/api/datasets.py
+++ b/lib/galaxy/webapps/galaxy/api/datasets.py
@@ -199,9 +199,9 @@
         if isinstance( data_provider, (SamDataProvider, BamDataProvider ) ):
             # Get reference sequence.
             if dataset.dbkey:
-                data_dict = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, low=low, high=high )
-                if data_dict:
-                    ref_seq = data_dict[ 'data' ]
+                region = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, low=low, high=high )
+                if region:
+                    ref_seq = region.sequence
 
             # Get mean depth.
             if not indexer:

diff -r 018d08d81c1e58e9aa42e8c5b179c413f6fb509b -r 46df437c405989b2d93fcccc0937aa804f0d9555 lib/galaxy/webapps/galaxy/api/genomes.py
--- a/lib/galaxy/webapps/galaxy/api/genomes.py
+++ b/lib/galaxy/webapps/galaxy/api/genomes.py
@@ -36,7 +36,8 @@
         # Return info.
         rval = None
         if reference:
-            rval = self.app.genomes.reference( trans, dbkey=id, chrom=chrom, low=low, high=high )
+            region = self.app.genomes.reference( trans, dbkey=id, chrom=chrom, low=low, high=high )
+            rval = { 'dataset_type': 'refseq', 'data': region.sequence }
         else:
             rval = self.app.genomes.chroms( trans, dbkey=id, num=num, chrom=chrom, low=low )
         return rval


https://bitbucket.org/galaxy/galaxy-central/commits/d09b3a2bd7b2/
Changeset:   d09b3a2bd7b2
User:        jgoecks
Date:        2014-07-14 17:35:14
Summary:     Aligned read visualization data providers: do reference-based compression for complete read rather than read in requested region so that read representation is uniform.
Affected #:  3 files

diff -r 46df437c405989b2d93fcccc0937aa804f0d9555 -r d09b3a2bd7b2f0fe07f9894923b9a73d02176a82 lib/galaxy/visualization/data_providers/cigar.py
--- a/lib/galaxy/visualization/data_providers/cigar.py
+++ b/lib/galaxy/visualization/data_providers/cigar.py
@@ -18,7 +18,7 @@
         return read_seq, cigar
 
     # Set up position for reference, read.
-    ref_seq_pos = read_start - ref_seq_start
+    ref_seq_pos = read_start
     read_pos = 0
 
     # Create new read sequence, cigar.
@@ -27,42 +27,30 @@
     cigar_ops = 'MIDNSHP=X'
     for op_tuple in cigar:
         op, op_len = op_tuple
-
+        
         # Op is index into string 'MIDNSHP=X'
         if op == 0: # Match
-            # If region falls outside ref_seq data, leave as M.
-            if ref_seq_start - read_start > op_len:
-                # Region falls completely outside of reference.
-                new_cigar += '%iM' % ( op_len )
-            else:
-                # Some of region overlap reference.
-                total_count = 0
-                if read_start < ref_seq_start:
-                    new_cigar += '%iM' % ( ref_seq_start - read_start )
-                    read_pos = ref_seq_start - read_start
-                    ref_seq_pos = 0
-                    total_count = read_pos
+            # Transform Ms to =s and Xs using reference.
+            new_op = ''
+            total_count = 0
+            while total_count < op_len and ref_seq_pos < len( ref_seq ):
+                match, count = _match_mismatch_counter( read_seq, read_pos, ref_seq, ref_seq_pos )
+                # Use min because count cannot exceed remainder of operation.
+                count = min( count, op_len - total_count )
+                if match:
+                    new_op = '='
+                else:
+                    new_op = 'X'
+                    # Include mismatched bases in new read sequence.
+                    new_read_seq += read_seq[ read_pos:read_pos + count ]
+                new_cigar += '%i%s' % ( count, new_op )
+                total_count += count
+                read_pos += count
+                ref_seq_pos += count
 
-                # Transform Ms to =s and Xs using reference.
-                new_op = ''
-                while total_count < op_len and ref_seq_pos < len( ref_seq ):
-                    match, count = _match_mismatch_counter( read_seq, read_pos, ref_seq, ref_seq_pos )
-                    # Use min because count cannot exceed remainder of operation.
-                    count = min( count, op_len - total_count )
-                    if match:
-                        new_op = '='
-                    else:
-                        new_op = 'X'
-                        # Include mismatched bases in new read sequence.
-                        new_read_seq += read_seq[ read_pos:read_pos + count ]
-                    new_cigar += '%i%s' % ( count, new_op )
-                    total_count += count
-                    read_pos += count
-                    ref_seq_pos += count
-
-                # If end of read falls outside of ref_seq data, leave as M.
-                if total_count < op_len:
-                    new_cigar += '%iM' % ( op_len - total_count )
+            # If end of read falls outside of ref_seq data, leave as M.
+            if total_count < op_len:
+                new_cigar += '%iM' % ( op_len - total_count )
         elif op == 1: # Insertion
             new_cigar += '%i%s' % ( op_len, cigar_ops[ op ] )
             # Include insertion bases in new read sequence.

diff -r 46df437c405989b2d93fcccc0937aa804f0d9555 -r d09b3a2bd7b2f0fe07f9894923b9a73d02176a82 lib/galaxy/visualization/data_providers/genome.py
--- a/lib/galaxy/visualization/data_providers/genome.py
+++ b/lib/galaxy/visualization/data_providers/genome.py
@@ -1068,8 +1068,8 @@
             '''
             read_seq, read_cigar = get_ref_based_read_seq_and_cigar( read[ seq_field ].upper(),
                                                                      read[ start_field ],
-                                                                     ref_seq,
-                                                                     start,
+                                                                     ref_seq.sequence,
+                                                                     ref_seq.start,
                                                                      read[ cigar_field ] )
             read[ seq_field ] = read_seq
             read[ cigar_field ] = read_cigar
@@ -1088,7 +1088,7 @@
         # if possible. Otherwise, convert cigar.
         if ref_seq:
             # Uppercase for easy comparison.
-            ref_seq = ref_seq.upper()
+            ref_seq.sequence = ref_seq.sequence.upper()
             process_read = compress_seq_and_cigar
         else:
             process_read = convert_cigar

diff -r 46df437c405989b2d93fcccc0937aa804f0d9555 -r d09b3a2bd7b2f0fe07f9894923b9a73d02176a82 lib/galaxy/webapps/galaxy/api/datasets.py
--- a/lib/galaxy/webapps/galaxy/api/datasets.py
+++ b/lib/galaxy/webapps/galaxy/api/datasets.py
@@ -199,9 +199,11 @@
         if isinstance( data_provider, (SamDataProvider, BamDataProvider ) ):
             # Get reference sequence.
             if dataset.dbkey:
-                region = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, low=low, high=high )
-                if region:
-                    ref_seq = region.sequence
+                # FIXME: increase region 500bp each way to provide sequence for overlapping reads. As reads
+                # get longer, this will need to be increased and/or a handle to the genomic data may be need
+                # to be given to the data provider.
+                region = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, 
+                                                     low=( int( low  ) - 500 ), high=( int( high ) + 500 ) )
 
             # Get mean depth.
             if not indexer:
@@ -212,7 +214,7 @@
 
         # Get and return data from data_provider.
         result = data_provider.get_data( chrom, int( low ), int( high ), int( start_val ), int( max_vals ),
-                                         ref_seq=ref_seq, mean_depth=mean_depth, **kwargs )
+                                         ref_seq=region, mean_depth=mean_depth, **kwargs )
         result.update( { 'dataset_type': data_provider.dataset_type, 'extra_info': extra_info } )
         return result

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

[galaxy-commits] commit/galaxy-central: 2 new changesets

commits-noreply＠bitbucket.org