commit/galaxy-central: richard_burhans: updated error handling for genome diversity select snps tool
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/7feab5786a19/ changeset: 7feab5786a19 user: richard_burhans date: 2011-06-17 18:16:53 summary: updated error handling for genome diversity select snps tool affected #: 2 files (3.1 KB) --- a/tools/genome_diversity/genome_diversity.py Fri Jun 17 10:53:41 2011 -0400 +++ b/tools/genome_diversity/genome_diversity.py Fri Jun 17 12:16:53 2011 -0400 @@ -1,5 +1,6 @@ #!/usr/bin/env python2.5 +import sys import cdblib def _openfile( filename=None, mode='r' ): @@ -21,7 +22,7 @@ return None -class SnpFile: +class SnpFile( object ): def __init__( self, filename=None, seq_col=1, pos_col=2, ref_seq_col=7, ref_pos_col=8 ): self.filename = filename self.fh = _openfile( filename ) @@ -63,7 +64,7 @@ return None, None -class IndexedFile: +class IndexedFile( object ): def __init__( self, data_file=None, index_file=None ): self.data_file = data_file @@ -176,3 +177,90 @@ else: return None + + +class LocationFile( object ): + def __init__(self, filename): + self.build_map(filename) + + def build_map(self, filename): + self.map = {} + self.open_file(filename) + for line in self.read_lines(): + elems = line.split('\t', 1) + if len(elems) == 2: + self.map[ elems[0].strip() ] = elems[1].strip() + self.close_file() + + def read_lines(self): + for line in self.fh: + if not line.startswith('#'): + line = line.rstrip('\r\n') + yield line + + def open_file(self, filename): + self.filename = filename + try: + self.fh = open(filename, 'r') + except IOError, err: + print >> sys.stderr, "Error opening location file '%s': %s" % (filename, str(err)) + sys.exit(1) + + def close_file(self): + self.fh.close() + + def loc_file( self, key ): + if key in self.map: + return self.map[key] + else: + print >> sys.stderr, "'%s' does not appear in location file '%s'" % (key, self.filename) + sys.exit(1) + +class ChrLens( object ): + def __init__( self, location_file, species ): + self.chrlen_loc = LocationFile( location_file ) + self.chrlen_filename = self.chrlen_loc.loc_file( species ) + self.build_map() + + def build_map(self): + self.map = {} + self.open_file(self.chrlen_filename) + for line in self.read_lines(): + elems = line.split('\t', 1) + if len(elems) == 2: + chrom = elems[0].strip() + chrom_len_text = elems[1].strip() + try: + chrom_len = int( chrom_len_text ) + except ValueError: + print >> sys.stderr, "Bad length '%s' for chromosome '%s' in '%s'" % (chrom_len_text, chrom, self.chrlen_filename) + self.map[ chrom ] = chrom_len + self.close_file() + + def read_lines(self): + for line in self.fh: + if not line.startswith('#'): + line = line.rstrip('\r\n') + yield line + + def open_file(self, filename): + self.filename = filename + try: + self.fh = open(filename, 'r') + except IOError, err: + print >> sys.stderr, "Error opening chromosome length file '%s': %s" % (filename, str(err)) + sys.exit(1) + + def close_file(self): + self.fh.close() + + def length( self, key ): + if key in self.map: + return self.map[key] + else: + return None + + def __iter__( self ): + for chrom in self.map: + yield chrom + --- a/tools/genome_diversity/select_snps.py Fri Jun 17 10:53:41 2011 -0400 +++ b/tools/genome_diversity/select_snps.py Fri Jun 17 12:16:53 2011 -0400 @@ -1,16 +1,9 @@ #!/usr/bin/env python -""" -basic version -need to add - o indexing - o better error handing -""" - import sys -import string import math from optparse import OptionParser +import genome_diversity as gd def main_function(parse_arguments=None): if parse_arguments is None: @@ -37,32 +30,19 @@ @main_function(parse_arguments) def main(options, arguments): - ref_chrom_idx = int( options.ref_chrom_col ) - 1 - ref_pos_idx = int( options.ref_pos_col ) - 1 - chrlens_fh = open( options.chrlens_loc, 'r' ) - for line in chrlens_fh: - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): - elems = line.split( '\t' ) - if len(elems) >= 2 and elems[0] == options.species: - chrom_info_file = elems[1] + ref_chrom_idx = to_int( options.ref_chrom_col ) -1 + ref_pos_idx = to_int( options.ref_pos_col ) -1 - chrom_info = open( chrom_info_file, 'r' ) + if (ref_chrom_idx < 1) or (ref_pos_idx < 1) or (ref_chrom_idx == ref_pos_idx): + print >> sys.stderr, "Cannot locate reference genome sequence (ref) or reference genome position (rPos) column for this dataset." + sys.exit(1) + + chrlens = gd.ChrLens( options.chrlens_loc, options.species ) + total_len = 0 - for i, line in enumerate( chrom_info ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): - elems = line.split() - if len(elems) == 2: - chrom = elems[0] - try: - chrom_len = int(elems[1]) - except ValueError: - sys.stderr.write( "bad chrom len in line %d column 2: %s\n" % ( i, elems[1] ) ) - sys.exit(1) - total_len += chrom_len; - chrom_info.close() + for chrom in chrlens: + total_len += chrlens.length(chrom) total_requested = int( options.num_snps ) lines, data, comments = get_snp_lines_data_and_comments( options.input, ref_chrom_idx, ref_pos_idx ) @@ -70,6 +50,13 @@ out_data = fix_selection_and_order_like_input(data, selected, total_requested) write_selected_snps( options.output, out_data, lines, comments ) +def to_int( value ): + try: + int_value = int( value ) + except ValueError: + int_value = 0 + return int_value + def get_snp_lines_data_and_comments( filename, chrom_idx, pos_idx ): fh = open( filename, 'r' ) if (chrom_idx >= pos_idx): Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket