galaxy-commits
Threads by month
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
April 2014
- 1 participants
- 261 discussions
commit/galaxy-central: Jeremy Goecks: Genome data providers: (a) organize imports and (b) implement random and nth read iterators to downsample deep coverage regions. Use nth read iterator by default because it is the fastest.
by commits-noreply@bitbucket.org 01 Apr '14
by commits-noreply@bitbucket.org 01 Apr '14
01 Apr '14
1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/615d5040d90b/
Changeset: 615d5040d90b
User: Jeremy Goecks
Date: 2014-04-01 16:30:12
Summary: Genome data providers: (a) organize imports and (b) implement random and nth read iterators to downsample deep coverage regions. Use nth read iterator by default because it is the fastest.
Affected #: 3 files
diff -r abe1482b550884d58cda3ca1e46200afa6c09ee9 -r 615d5040d90b23a3476ff2e8c388c338fb031c09 lib/galaxy/visualization/data_providers/genome.py
--- a/lib/galaxy/visualization/data_providers/genome.py
+++ b/lib/galaxy/visualization/data_providers/genome.py
@@ -4,21 +4,24 @@
import os, sys, re
import pkg_resources
+import itertools
+import random
+
pkg_resources.require( "bx-python" )
pkg_resources.require( "pysam" )
pkg_resources.require( "numpy" )
import numpy
-from galaxy.datatypes.util.gff_util import convert_gff_coords_to_bed, GFFFeature, GFFInterval, GFFReaderWrapper, parse_gff_attributes
-from galaxy.util.json import from_json_string
from bx.interval_index_file import Indexes
from bx.bbi.bigwig_file import BigWigFile
from bx.bbi.bigbed_file import BigBedFile
+from pysam import csamtools, ctabix
+
+from galaxy.datatypes.util.gff_util import convert_gff_coords_to_bed, GFFFeature, GFFInterval, GFFReaderWrapper, parse_gff_attributes
+from galaxy.util.json import from_json_string
from galaxy.visualization.data_providers.basic import BaseDataProvider
from galaxy.visualization.data_providers.cigar import get_ref_based_read_seq_and_cigar
from galaxy.datatypes.interval import Bed, Gff, Gtf
-from pysam import csamtools, ctabix
-
#
# Utility functions.
#
@@ -887,8 +890,10 @@
except ValueError:
return None
return data
+
- def process_data( self, iterator, start_val=0, max_vals=None, ref_seq=None, start=0, **kwargs ):
+ def process_data( self, iterator, start_val=0, max_vals=None, ref_seq=None,
+ iterator_type='nth', mean_depth=None, start=0, end=0, **kwargs ):
"""
Returns a dict with the following attributes::
@@ -911,7 +916,6 @@
max_low - lowest coordinate for the returned reads
max_high - highest coordinate for the returned reads
message - error/informative message
-
"""
# No iterator indicates no reads.
if iterator is None:
@@ -921,14 +925,59 @@
# Helper functions.
#
- # Decode strand from read flag.
def decode_strand( read_flag, mask ):
+ """ Decode strand from read flag. """
+
strand_flag = ( read_flag & mask == 0 )
if strand_flag:
return "+"
else:
return "-"
+ def _random_read_iterator( read_iterator, threshold ):
+ """
+ An iterator that returns a random stream of reads from the read_iterator
+ as well as corresponding pairs for returned reads.
+ threshold is a value in [0,1] that denotes the percentage of reads
+ to return.
+ """
+ for e in read_iterator:
+ if e.qname in paired_pending or random.uniform( 0, 1 ) <= threshold:
+ yield e
+
+ def _nth_read_iterator( read_iterator, threshold ):
+ """
+ An iterator that returns every nth read.
+ """
+
+ # Convert threshold to N for stepping through iterator.
+ n = int( 1/threshold )
+ for e in itertools.islice( read_iterator, None, None, n ):
+ yield e
+
+ # Alternatate and much slower implementation that looks for pending pairs.
+ '''
+ for i, e in enumerate( read_iterator ):
+ if e.qname in paired_pending or ( i % n ) == 0:
+ yield e
+ '''
+
+ # -- Choose iterator. --
+
+ # Calculate threshold for non-sequential iterators based on mean_depth and read length.
+ first_read = next( iterator )
+ read_len = len( first_read.seq )
+ num_reads = ( end - start ) * mean_depth / float ( read_len )
+ threshold = float( max_vals )/ num_reads
+ iterator = itertools.chain( iter( [ first_read ] ), iterator )
+
+ if iterator_type == 'sequential':
+ read_iterator = iterator
+ elif iterator_type == 'random':
+ read_iterator = _random_read_iterator( iterator, threshold )
+ elif iterator_type == 'nth':
+ read_iterator = _nth_read_iterator( iterator, threshold )
+
#
# Encode reads as list of lists.
#
@@ -936,7 +985,8 @@
paired_pending = {}
unmapped = 0
message = None
- for count, read in enumerate( iterator ):
+ count = 0
+ for read in read_iterator:
if count < start_val:
continue
if ( count - start_val - unmapped ) >= max_vals:
@@ -958,7 +1008,8 @@
read_len = len(seq) # If no cigar, just use sequence length
if read.is_proper_pair:
- if qname in paired_pending: # one in dict is always first
+ if qname in paired_pending:
+ # Found pair.
pair = paired_pending[qname]
results.append( [ "%i_%s" % ( pair['start'], qname ),
pair['start'],
@@ -970,15 +1021,17 @@
] )
del paired_pending[qname]
else:
+ # Insert first of pair.
paired_pending[qname] = { 'start': read.pos, 'end': read.pos + read_len, 'seq': seq, 'mate_start': read.mpos,
'rlen': read_len, 'strand': strand, 'cigar': read.cigar, 'mapq': read.mapq }
+ count += 1
else:
results.append( [ "%i_%s" % ( read.pos, qname ),
read.pos, read.pos + read_len, qname,
read.cigar, strand, read.seq, read.mapq ] )
-
+ count += 1
+
# Take care of reads whose mates are out of range.
- # TODO: count paired reads when adhering to max_vals?
for qname, read in paired_pending.iteritems():
if read['mate_start'] < read['start']:
# Mate is before read.
diff -r abe1482b550884d58cda3ca1e46200afa6c09ee9 -r 615d5040d90b23a3476ff2e8c388c338fb031c09 lib/galaxy/webapps/galaxy/api/datasets.py
--- a/lib/galaxy/webapps/galaxy/api/datasets.py
+++ b/lib/galaxy/webapps/galaxy/api/datasets.py
@@ -2,7 +2,7 @@
API operations on the contents of a history dataset.
"""
from galaxy import web
-from galaxy.visualization.data_providers.genome import FeatureLocationIndexDataProvider
+from galaxy.visualization.data_providers.genome import FeatureLocationIndexDataProvider, SamDataProvider, BamDataProvider
from galaxy.web.base.controller import BaseAPIController, UsesVisualizationMixin, UsesHistoryDatasetAssociationMixin
from galaxy.web.base.controller import UsesHistoryMixin
from galaxy.web.framework.helpers import is_true
@@ -146,6 +146,7 @@
extra_info = None
mode = kwargs.get( "mode", "Auto" )
data_provider_registry = trans.app.data_provider_registry
+ indexer = None
# Coverage mode uses index data.
if mode == "Coverage":
@@ -192,16 +193,26 @@
if max_vals is None:
max_vals = data_provider.get_default_max_vals()
- # Get reference sequence for region; this is used by providers for aligned reads.
+ # Get reference sequence and mean depth for region; these is used by providers for aligned reads.
ref_seq = None
- if dataset.dbkey:
- data_dict = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, low=low, high=high )
- if data_dict:
- ref_seq = data_dict[ 'data' ]
+ mean_depth = None
+ if isinstance( data_provider, (SamDataProvider, BamDataProvider ) ):
+ # Get reference sequence.
+ if dataset.dbkey:
+ data_dict = self.app.genomes.reference( trans, dbkey=dataset.dbkey, chrom=chrom, low=low, high=high )
+ if data_dict:
+ ref_seq = data_dict[ 'data' ]
+
+ # Get mean depth.
+ if not indexer:
+ indexer = data_provider_registry.get_data_provider( trans, original_dataset=dataset, source='index' )
+ stats = indexer.get_data( chrom, low, high, stats=True )
+ mean_depth = stats[ 'data' ][ 'mean' ]
+
# Get and return data from data_provider.
result = data_provider.get_data( chrom, int( low ), int( high ), int( start_val ), int( max_vals ),
- ref_seq=ref_seq, **kwargs )
+ ref_seq=ref_seq, mean_depth=mean_depth, **kwargs )
result.update( { 'dataset_type': data_provider.dataset_type, 'extra_info': extra_info } )
return result
diff -r abe1482b550884d58cda3ca1e46200afa6c09ee9 -r 615d5040d90b23a3476ff2e8c388c338fb031c09 static/scripts/viz/trackster/tracks.js
--- a/static/scripts/viz/trackster/tracks.js
+++ b/static/scripts/viz/trackster/tracks.js
@@ -1952,6 +1952,9 @@
this.has_icons = false;
// Add message + action icons to tile's html.
+ /*
+ This does not work right now because a random set of reads is returned by the server.
+ When the server can respond with more data systematically, renable these icons.
if (message) {
this.has_icons = true;
@@ -1999,6 +2002,7 @@
e.stopPropagation();
});
}
+ */
};
extend(FeatureTrackTile.prototype, Tile.prototype);
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
1
0