commit/galaxy-central: 3 new changesets
3 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/6a85e1496b98/ Changeset: 6a85e1496b98 Branch: next-stable User: Jeremy Goecks Date: 2014-04-09 16:47:22 Summary: Genome data provider: (a) handle regions with no mapped reads and (b) clean up trailing whitespace. Affected #: 1 file diff -r 44f5b4a2091bc6e552e0c79a0a761cfc8759a36f -r 6a85e1496b98ac05cdfff0b6940877c19bc37cd8 lib/galaxy/visualization/data_providers/genome.py --- a/lib/galaxy/visualization/data_providers/genome.py +++ b/lib/galaxy/visualization/data_providers/genome.py @@ -965,18 +965,23 @@ # -- Choose iterator. -- # Calculate threshold for non-sequential iterators based on mean_depth and read length. - first_read = next( iterator ) + try: + first_read = next( iterator ) + except StopIteration: + # no reads. + return { 'data': [], 'message': None, 'max_low': start, 'max_high': start } + read_len = len( first_read.seq ) num_reads = ( end - start ) * mean_depth / float ( read_len ) threshold = float( max_vals )/ num_reads iterator = itertools.chain( iter( [ first_read ] ), iterator ) # Use specified iterator type, save for when threshold is >= 1. - # A threshold of >= 1 indicates all reads are to be returned, so no + # A threshold of >= 1 indicates all reads are to be returned, so no # sampling needed and seqential iterator will be used. if iterator_type == 'sequential' or threshold >= 1: read_iterator = iterator - elif iterator_type == 'random': + elif iterator_type == 'random': read_iterator = _random_read_iterator( iterator, threshold ) elif iterator_type == 'nth': read_iterator = _nth_read_iterator( iterator, threshold ) https://bitbucket.org/galaxy/galaxy-central/commits/376ffdb91bd1/ Changeset: 376ffdb91bd1 Branch: next-stable User: Jeremy Goecks Date: 2014-04-09 18:53:34 Summary: SAM/BAM data provider: fix bug and streamline code to support both genomes with reference data and those without. Affected #: 1 file diff -r 6a85e1496b98ac05cdfff0b6940877c19bc37cd8 -r 376ffdb91bd1c1a7881e72c5cb6da8dea0e0534f lib/galaxy/visualization/data_providers/genome.py --- a/lib/galaxy/visualization/data_providers/genome.py +++ b/lib/galaxy/visualization/data_providers/genome.py @@ -1063,45 +1063,48 @@ # Clean up. TODO: is this needed? If so, we'll need a cleanup function after processing the data. # bamfile.close() - # If there are results and reference data, transform read sequence and cigar. - if len( results ) != 0 and ref_seq: - def process_read( read, start_field, cigar_field, seq_field ): - ''' - Process a read using the designated fields. - ''' - read_seq, read_cigar = get_ref_based_read_seq_and_cigar( read[ seq_field ].upper(), - read[ start_field ], - ref_seq, - start, - read[ cigar_field ] ) - read[ seq_field ] = read_seq - read[ cigar_field ] = read_cigar + def compress_seq_and_cigar( read, start_field, cigar_field, seq_field ): + ''' + Use reference-based compression to compress read sequence and cigar. + ''' + read_seq, read_cigar = get_ref_based_read_seq_and_cigar( read[ seq_field ].upper(), + read[ start_field ], + ref_seq, + start, + read[ cigar_field ] ) + read[ seq_field ] = read_seq + read[ cigar_field ] = read_cigar - def process_se_read( read ): - ''' - Process single-end read. - ''' - process_read( read, 1, 4, 6) + def convert_cigar( read, start_field, cigar_field, seq_field ): + ''' + Convert read cigar from pysam format to string format. + ''' + cigar_ops = 'MIDNSHP=X' + read_cigar = '' + for op_tuple in read[ cigar_field ]: + read_cigar += '%i%s' % ( op_tuple[1], cigar_ops[ op_tuple[0] ] ) + read[ cigar_field ] = read_cigar - def process_pe_read( read ): - ''' - Process paired-end read. - ''' + # Choose method for processing reads. Use reference-based compression + # if possible. Otherwise, convert cigar. + if ref_seq: + # Uppercase for easy comparison. + ref_seq = ref_seq.upper() + process_read = compress_seq_and_cigar + else: + process_read = convert_cigar + + # Process reads. + for read in results: + if isinstance( read[ 5 ], list ): + # Paired-end read. if len( read[4] ) > 2: process_read( read[4], 0, 2, 4 ) if len( read[5] ) > 2: process_read( read[5], 0, 2, 4 ) - - # Uppercase for easy comparison. - ref_seq = ref_seq.upper() - - # Process reads. - for read in results: - # Use correct function for processing reads. - if isinstance( read[ 5 ], list ): - process_pe_read( read ) - else: - process_se_read( read ) + else: + # Single-end read. + process_read( read, 1, 4, 6) max_low, max_high = get_bounds( results, 1, 2 ) https://bitbucket.org/galaxy/galaxy-central/commits/fbe5f3970ac2/ Changeset: fbe5f3970ac2 User: Jeremy Goecks Date: 2014-04-09 18:53:57 Summary: Automated merge of next-stable to default. Affected #: 1 file diff -r 4aee689e69dac57676de7f9ceb0a31f227bc0dd2 -r fbe5f3970ac2f1fe32710a4962a175de7264c2be lib/galaxy/visualization/data_providers/genome.py --- a/lib/galaxy/visualization/data_providers/genome.py +++ b/lib/galaxy/visualization/data_providers/genome.py @@ -965,18 +965,23 @@ # -- Choose iterator. -- # Calculate threshold for non-sequential iterators based on mean_depth and read length. - first_read = next( iterator ) + try: + first_read = next( iterator ) + except StopIteration: + # no reads. + return { 'data': [], 'message': None, 'max_low': start, 'max_high': start } + read_len = len( first_read.seq ) num_reads = ( end - start ) * mean_depth / float ( read_len ) threshold = float( max_vals )/ num_reads iterator = itertools.chain( iter( [ first_read ] ), iterator ) # Use specified iterator type, save for when threshold is >= 1. - # A threshold of >= 1 indicates all reads are to be returned, so no + # A threshold of >= 1 indicates all reads are to be returned, so no # sampling needed and seqential iterator will be used. if iterator_type == 'sequential' or threshold >= 1: read_iterator = iterator - elif iterator_type == 'random': + elif iterator_type == 'random': read_iterator = _random_read_iterator( iterator, threshold ) elif iterator_type == 'nth': read_iterator = _nth_read_iterator( iterator, threshold ) @@ -1058,45 +1063,48 @@ # Clean up. TODO: is this needed? If so, we'll need a cleanup function after processing the data. # bamfile.close() - # If there are results and reference data, transform read sequence and cigar. - if len( results ) != 0 and ref_seq: - def process_read( read, start_field, cigar_field, seq_field ): - ''' - Process a read using the designated fields. - ''' - read_seq, read_cigar = get_ref_based_read_seq_and_cigar( read[ seq_field ].upper(), - read[ start_field ], - ref_seq, - start, - read[ cigar_field ] ) - read[ seq_field ] = read_seq - read[ cigar_field ] = read_cigar + def compress_seq_and_cigar( read, start_field, cigar_field, seq_field ): + ''' + Use reference-based compression to compress read sequence and cigar. + ''' + read_seq, read_cigar = get_ref_based_read_seq_and_cigar( read[ seq_field ].upper(), + read[ start_field ], + ref_seq, + start, + read[ cigar_field ] ) + read[ seq_field ] = read_seq + read[ cigar_field ] = read_cigar - def process_se_read( read ): - ''' - Process single-end read. - ''' - process_read( read, 1, 4, 6) + def convert_cigar( read, start_field, cigar_field, seq_field ): + ''' + Convert read cigar from pysam format to string format. + ''' + cigar_ops = 'MIDNSHP=X' + read_cigar = '' + for op_tuple in read[ cigar_field ]: + read_cigar += '%i%s' % ( op_tuple[1], cigar_ops[ op_tuple[0] ] ) + read[ cigar_field ] = read_cigar - def process_pe_read( read ): - ''' - Process paired-end read. - ''' + # Choose method for processing reads. Use reference-based compression + # if possible. Otherwise, convert cigar. + if ref_seq: + # Uppercase for easy comparison. + ref_seq = ref_seq.upper() + process_read = compress_seq_and_cigar + else: + process_read = convert_cigar + + # Process reads. + for read in results: + if isinstance( read[ 5 ], list ): + # Paired-end read. if len( read[4] ) > 2: process_read( read[4], 0, 2, 4 ) if len( read[5] ) > 2: process_read( read[5], 0, 2, 4 ) - - # Uppercase for easy comparison. - ref_seq = ref_seq.upper() - - # Process reads. - for read in results: - # Use correct function for processing reads. - if isinstance( read[ 5 ], list ): - process_pe_read( read ) - else: - process_se_read( read ) + else: + # Single-end read. + process_read( read, 1, 4, 6) max_low, max_high = get_bounds( results, 1, 2 ) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org