[galaxy-commits] commit/galaxy-central: natefoo: Miller Lab's Genome Diversity tools moved to the Tool Shed.

9 Apr 2012

1 new commit in galaxy-central:


https://bitbucket.org/galaxy/galaxy-central/changeset/849fcfda099e/
changeset:   849fcfda099e
user:        natefoo
date:        2012-04-09 19:13:44
summary:     Miller Lab's Genome Diversity tools moved to the Tool Shed.
affected #:  14 files

diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e datatypes_conf.xml.sample

--- a/datatypes_conf.xml.sample
+++ b/datatypes_conf.xml.sample
@@ -171,7 +171,6 @@
       <display file="rviewer/vcf.xml" inherit="True"/></datatype><datatype extension="bcf" type="galaxy.datatypes.binary:Binary" subclass="True"/>
-    <datatype extension="wsf" type="galaxy.datatypes.wsf:SnpFile" display_in_upload="true"/><datatype extension="velvet" type="galaxy.datatypes.assembly:Velvet" display_in_upload="false"/><datatype extension="wig" type="galaxy.datatypes.interval:Wiggle" display_in_upload="true"><converter file="wig_to_bigwig_converter.xml" target_datatype="bigwig"/>


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e lib/galaxy/datatypes/registry.py
--- a/lib/galaxy/datatypes/registry.py
+++ b/lib/galaxy/datatypes/registry.py
@@ -2,7 +2,7 @@
 Provides mapping between extensions and datatypes, mime-types, etc.
 """
 import os, sys, tempfile, threading, logging
-import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary, assembly, ngsindex, wsf
+import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary, assembly, ngsindex
 import galaxy.util
 from galaxy.util.odict import odict
 from display_applications.application import DisplayApplication


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e lib/galaxy/datatypes/wsf.py
--- a/lib/galaxy/datatypes/wsf.py
+++ /dev/null
@@ -1,156 +0,0 @@
-"""
-SnpFile datatype
-"""
-
-import re
-import data
-from galaxy import util
-from galaxy.datatypes.sniff import *
-from galaxy.datatypes.tabular import Tabular
-from galaxy.datatypes import metadata
-from galaxy.datatypes.metadata import MetadataElement
-
-class SnpFile( Tabular ):
-    """ Webb's SNP file format """
-    file_ext = 'wsf'
-    species_regex = re.compile('species=(\S+)')
-    MetadataElement( name="species", desc="species", default='', no_value='', visible=False, readonly=True )
-    MetadataElement( name="scaffold", desc="scaffold column", param=metadata.ColumnParameter, default=0 )
-    MetadataElement( name="pos", desc="pos column", param=metadata.ColumnParameter, default=0 )
-    MetadataElement( name="ref", desc="ref column", param=metadata.ColumnParameter, default=0 )
-    MetadataElement( name="rPos", desc="rPos column", param=metadata.ColumnParameter, default=0 )
-    MetadataElement( name="labels", desc="Number of labels", default=0, no_value=0, visible=False, readonly=True )
-    MetadataElement( name="label_for_column", desc="Mapping from column to label", default=[], no_value=[], visible=False, readonly=True )
-    MetadataElement( name="columns_with_label", desc="Mapping from label to columns", param=metadata.DictParameter, default={}, no_value={}, visible=False, readonly=True )
-    MetadataElement( name="column_headers", desc="Column headers", default=[], no_value=[], visible=False, readonly=True )
-
-
-    def set_meta( self, dataset, overwrite = True, **kwd ):
-        Tabular.set_meta( self, dataset, overwrite=overwrite, max_data_lines=None, **kwd )
-        # these two if statements work around a potential bug in metadata.py
-        if dataset.metadata.labels is None or dataset.metadata.labels == dataset.metadata.spec['labels'].no_value:
-            self._set_column_labels_metadata( dataset )
-        if dataset.metadata.column_headers is None or dataset.metadata.column_headers == dataset.metadata.spec['column_headers'].no_value:
-            self._set_column_headers_metadata( dataset )
-        self._set_columnParameter_metadata( dataset )
-
-
-    def _set_column_labels_metadata( self, dataset ):
-        def build_map_from_label_to_comma_separated_column_list( labels ):
-            map = {}
-            for index, label in enumerate( labels ):
-                map.setdefault( label, [] ).append( index )
-
-            for label in map:
-                map[label] = ','.join( [ str( index + 1 ) for index in map[label] ] )
-            return map
-
-        def strip_list_elements( list ):
-            return [ element.strip() for element in list ]
-
-        def initial_comment_lines_of_dataset( dataset ):
-            comment_lines = []
-            if dataset.has_data():
-                try:
-                    fh = open( dataset.file_name, 'r' )
-                    for line in fh:
-                        if not line.startswith('#'):
-                            break
-                        line = line[1:]
-                        line = line.rstrip( '\r\n' )
-                        if line:
-                            comment_lines.append( line )
-                    fh.close()
-                except:
-                    pass
-            return comment_lines
-
-        def set_metadata_from_comment_lines( dataset ):
-            labels = []
-            comment_lines = initial_comment_lines_of_dataset( dataset )
-
-            for line in comment_lines:
-                match = SnpFile.species_regex.match( line )
-                if match:
-                    dataset.metadata.species = match.group(1)
-                    continue
-                elems = line.split( '\t' )
-                if len(elems) > 1:
-                    labels = strip_list_elements( elems )
-
-            dataset.metadata.labels = len( labels )
-            dataset.metadata.label_for_column = labels[:]
-            if labels:
-                dataset.metadata.label_for_column.insert(0, '')
-                dataset.metadata.columns_with_label = build_map_from_label_to_comma_separated_column_list( labels )
-
-        set_metadata_from_comment_lines( dataset )
-
-
-    def _set_column_headers_metadata( self, dataset ):
-        if dataset.metadata.labels < dataset.metadata.columns:
-            column_headers = dataset.metadata.label_for_column[1:] + [ '' ] * ( dataset.metadata.columns - dataset.metadata.labels )
-        else:
-            column_headers = dataset.metadata.label_for_column[1:dataset.metadata.columns+1]
-
-        dataset.metadata.column_headers = column_headers
-
-
-    def _set_columnParameter_metadata( self, dataset ):
-        def unique_column_number_or_zero( string ):
-            try:
-                val = int( string )
-            except:
-                val = 0
-            return val
-
-        for name in self._metadata_columnParameter_names( dataset ):
-            if name in dataset.metadata.columns_with_label:
-                if dataset.metadata.columns_with_label[name]:
-                    column = unique_column_number_or_zero( dataset.metadata.columns_with_label[name] )
-                    if column:
-                        setattr( dataset.metadata, name, column )
-
-
-    def _metadata_columnParameter_names( self, dataset ):
-        for name, spec in dataset.metadata.spec.items():
-            if isinstance( spec.param, metadata.ColumnParameter ):
-                yield name
-
-
-    def set_peek( self, dataset, line_count=None, is_multi_byte=False ):
-        super(Tabular, self).set_peek( dataset, line_count=line_count, is_multi_byte=is_multi_byte, skipchars=[ '#' ])
-
-
-    def make_html_table( self, dataset, skipchars=[ '#' ] ):
-        """Create HTML table, used for displaying peek"""
-        def table_header_values( dataset ):
-            headers = dataset.metadata.column_headers[:]
-            for name in self._metadata_columnParameter_names( dataset ):
-                col = getattr( dataset.metadata, name )
-                assert col <= dataset.metadata.columns, Exception( 'ColumnParameter %s %d > %d columns for dataset %s.' % ( name, col, dataset.metadata.columns, dataset.id ) )
-                if col > 0:
-                    headers[ col - 1 ] = name
-            return headers
-
-        def table_headers( dataset ):
-            out = [ '<tr>' ]
-            headers = table_header_values( dataset )
-            for index, header in enumerate( headers ):
-                column = index + 1
-                if header:
-                    out.append( "<th>%d.%s</th>" % ( column, header ) )
-                else:
-                    out.append( "<th>%d.</th>" % column )
-            out.append( '</tr>' )
-            return out
-
-        try:
-            out = ['<table cellspacing="0" cellpadding="3">']
-            out.extend( table_headers( dataset ) )
-            out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) )
-            out.append( '</table>' )
-            out = "".join( out )
-        except Exception, exc:
-            out = "Can't create peek %s" % exc
-        return out


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tool_conf.xml.sample
--- a/tool_conf.xml.sample
+++ b/tool_conf.xml.sample
@@ -482,12 +482,6 @@
     <tool file="phenotype_association/vcf2pgSnp.xml" /><tool file="phenotype_association/dividePgSnpAlleles.xml" /></section>
-  <section name="Genome Diversity" id="gd">
-    <tool file="genome_diversity/extract_primers.xml" />
-    <tool file="genome_diversity/select_snps.xml" />
-    <tool file="genome_diversity/select_restriction_enzymes.xml" />
-    <tool file="genome_diversity/extract_flanking_dna.xml" />
-  </section><section name="VCF Tools" id="vcf_tools"><tool file="vcf_tools/intersect.xml" /><tool file="vcf_tools/annotate.xml" />


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/cdblib.py
--- a/tools/genome_diversity/cdblib.py
+++ /dev/null
@@ -1,230 +0,0 @@
-#!/usr/bin/env python2.5
-
-'''
-Manipulate DJB's Constant Databases. These are 2 level disk-based hash tables
-that efficiently handle many keys, while remaining space-efficient.
-
-    http://cr.yp.to/cdb.html
-
-When generated databases are only used with Python code, consider using hash()
-rather than djb_hash() for a tidy speedup.
-'''
-
-from _struct import Struct
-from itertools import chain
-
-
-def py_djb_hash(s):
-    '''Return the value of DJB's hash function for the given 8-bit string.'''
-    h = 5381
-    for c in s:
-        h = (((h << 5) + h) ^ ord(c)) & 0xffffffff
-    return h
-
-try:
-    from _cdblib import djb_hash
-except ImportError:
-    djb_hash = py_djb_hash
-
-read_2_le4 = Struct('<LL').unpack
-write_2_le4 = Struct('<LL').pack
-
-
-class Reader(object):
-    '''A dictionary-like object for reading a Constant Database accessed
-    through a string or string-like sequence, such as mmap.mmap().'''
-
-    def __init__(self, data, hashfn=djb_hash):
-        '''Create an instance reading from a sequence and using hashfn to hash
-        keys.'''
-        if len(data) < 2048:
-            raise IOError('CDB too small')
-
-        self.data = data
-        self.hashfn = hashfn
-
-        self.index = [read_2_le4(data[i:i+8]) for i in xrange(0, 2048, 8)]
-        self.table_start = min(p[0] for p in self.index)
-        # Assume load load factor is 0.5 like official CDB.
-        self.length = sum(p[1] >> 1 for p in self.index)
-
-    def iteritems(self):
-        '''Like dict.iteritems(). Items are returned in insertion order.'''
-        pos = 2048
-        while pos < self.table_start:
-            klen, dlen = read_2_le4(self.data[pos:pos+8])
-            pos += 8
-
-            key = self.data[pos:pos+klen]
-            pos += klen
-
-            data = self.data[pos:pos+dlen]
-            pos += dlen
-
-            yield key, data
-
-    def items(self):
-        '''Like dict.items().'''
-        return list(self.iteritems())
-
-    def iterkeys(self):
-        '''Like dict.iterkeys().'''
-        return (p[0] for p in self.iteritems())
-    __iter__ = iterkeys
-
-    def itervalues(self):
-        '''Like dict.itervalues().'''
-        return (p[1] for p in self.iteritems())
-
-    def keys(self):
-        '''Like dict.keys().'''
-        return [p[0] for p in self.iteritems()]
-
-    def values(self):
-        '''Like dict.values().'''
-        return [p[1] for p in self.iteritems()]
-
-    def __getitem__(self, key):
-        '''Like dict.__getitem__().'''
-        value = self.get(key)
-        if value is None:
-            raise KeyError(key)
-        return value
-
-    def has_key(self, key):
-        '''Return True if key exists in the database.'''
-        return self.get(key) is not None
-    __contains__ = has_key
-
-    def __len__(self):
-        '''Return the number of records in the database.'''
-        return self.length
-
-    def gets(self, key):
-        '''Yield values for key in insertion order.'''
-        # Truncate to 32 bits and remove sign.
-        h = self.hashfn(key) & 0xffffffff
-        start, nslots = self.index[h & 0xff]
-
-        if nslots:
-            end = start + (nslots << 3)
-            slot_off = start + (((h >> 8) % nslots) << 3)
-
-            for pos in chain(xrange(slot_off, end, 8),
-                             xrange(start, slot_off, 8)):
-                rec_h, rec_pos = read_2_le4(self.data[pos:pos+8])
-
-                if not rec_h:
-                    break
-                elif rec_h == h:
-                    klen, dlen = read_2_le4(self.data[rec_pos:rec_pos+8])
-                    rec_pos += 8
-
-                    if self.data[rec_pos:rec_pos+klen] == key:
-                        rec_pos += klen
-                        yield self.data[rec_pos:rec_pos+dlen]
-
-    def get(self, key, default=None):
-        '''Get the first value for key, returning default if missing.'''
-        # Avoid exception catch when handling default case; much faster.
-        return chain(self.gets(key), (default,)).next()
-
-    def getint(self, key, default=None, base=0):
-        '''Get the first value for key converted it to an int, returning
-        default if missing.'''
-        value = self.get(key, default)
-        if value is not default:
-            return int(value, base)
-        return value
-
-    def getints(self, key, base=0):
-        '''Yield values for key in insertion order after converting to int.'''
-        return (int(v, base) for v in self.gets(key))
-
-    def getstring(self, key, default=None, encoding='utf-8'):
-        '''Get the first value for key decoded as unicode, returning default if
-        not found.'''
-        value = self.get(key, default)
-        if value is not default:
-            return value.decode(encoding)
-        return value
-
-    def getstrings(self, key, encoding='utf-8'):
-        '''Yield values for key in insertion order after decoding as
-        unicode.'''
-        return (v.decode(encoding) for v in self.gets(key))
-
-
-class Writer(object):
-    '''Object for building new Constant Databases, and writing them to a
-    seekable file-like object.'''
-
-    def __init__(self, fp, hashfn=djb_hash):
-        '''Create an instance writing to a file-like object, using hashfn to
-        hash keys.'''
-        self.fp = fp
-        self.hashfn = hashfn
-
-        fp.write('\x00' * 2048)
-        self._unordered = [[] for i in xrange(256)]
-
-    def put(self, key, value=''):
-        '''Write a string key/value pair to the output file.'''
-        assert type(key) is str and type(value) is str
-
-        pos = self.fp.tell()
-        self.fp.write(write_2_le4(len(key), len(value)))
-        self.fp.write(key)
-        self.fp.write(value)
-
-        h = self.hashfn(key) & 0xffffffff
-        self._unordered[h & 0xff].append((h, pos))
-
-    def puts(self, key, values):
-        '''Write more than one value for the same key to the output file.
-        Equivalent to calling put() in a loop.'''
-        for value in values:
-            self.put(key, value)
-
-    def putint(self, key, value):
-        '''Write an integer as a base-10 string associated with the given key
-        to the output file.'''
-        self.put(key, str(value))
-
-    def putints(self, key, values):
-        '''Write zero or more integers for the same key to the output file.
-        Equivalent to calling putint() in a loop.'''
-        self.puts(key, (str(value) for value in values))
-
-    def putstring(self, key, value, encoding='utf-8'):
-        '''Write a unicode string associated with the given key to the output
-        file after encoding it as UTF-8 or the given encoding.'''
-        self.put(key, unicode.encode(value, encoding))
-
-    def putstrings(self, key, values, encoding='utf-8'):
-        '''Write zero or more unicode strings to the output file. Equivalent to
-        calling putstring() in a loop.'''
-        self.puts(key, (unicode.encode(value, encoding) for value in values))
-
-    def finalize(self):
-        '''Write the final hash tables to the output file, and write out its
-        index. The output file remains open upon return.'''
-        index = []
-        for tbl in self._unordered:
-            length = len(tbl) << 1
-            ordered = [(0, 0)] * length
-            for pair in tbl:
-                where = (pair[0] >> 8) % length
-                for i in chain(xrange(where, length), xrange(0, where)):
-                    if not ordered[i][0]:
-                        ordered[i] = pair
-                        break
-
-            index.append((self.fp.tell(), length))
-            for pair in ordered:
-                self.fp.write(write_2_le4(*pair))
-
-        self.fp.seek(0)
-        for pair in index:
-            self.fp.write(write_2_le4(*pair))
-        self.fp = None # prevent double finalize()


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/extract_flanking_dna.py
--- a/tools/genome_diversity/extract_flanking_dna.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env python2.5
-
-import os
-import sys
-from optparse import OptionParser
-import genome_diversity as gd
-
-def main_function( parse_arguments=None ):
-    if parse_arguments is None:
-        parse_arguments = lambda arguments: ( None, arguments )
-    def main_decorator( to_decorate ):
-        def decorated_main( arguments=None ):
-            if arguments is None:
-                arguments = sys.argv
-            options, arguments = parse_arguments( arguments )
-            rc = 1
-            try:
-                rc = to_decorate( options, arguments )
-            except Exception, err:
-                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
-                traceback.print_exc()
-            finally:
-                sys.exit( rc )
-        return decorated_main
-    return main_decorator
-
-def parse_arguments( arguments ):
-    parser = OptionParser()
-    parser.add_option('--input',
-                        type='string', dest='input',
-                        help='file of selected SNPs')
-    parser.add_option('--output',
-                        type='string', dest='output',
-                        help='output file')
-    parser.add_option('--snps_loc',
-                        type='string', dest='snps_loc',
-                        help='snps .loc file')
-    parser.add_option('--scaffold_col',
-                        type="int", dest='scaffold_col',
-                        help='scaffold column in the input file')
-    parser.add_option('--pos_col',
-                        type="int", dest='pos_col',
-                        help='position column in the input file')
-    parser.add_option('--output_format',
-                        type="string", dest='output_format',
-                        help='output format, fasta or primer3')
-    parser.add_option('--species',
-                        type="string", dest='species',
-                        help='species')
-    return parser.parse_args( arguments[1:] )
-
-
-@main_function( parse_arguments )
-def main( options, arguments ):
-    if not options.input:
-        raise RuntimeError( 'missing --input option' )
-    if not options.output:
-        raise RuntimeError( 'missing --output option' )
-    if not options.snps_loc:
-        raise RuntimeError( 'missing --snps_loc option' )
-    if not options.scaffold_col:
-        raise RuntimeError( 'missing --scaffold_col option' )
-    if not options.pos_col:
-        raise RuntimeError( 'missing --pos_col option' )
-    if not options.output_format:
-        raise RuntimeError( 'missing --output_format option' )
-    if not options.species:
-        raise RuntimeError( 'missing --species option' )
-    
-    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
-
-    out_fh = gd._openfile( options.output, 'w' )
-
-    snpcalls_file = gd.get_filename_from_loc( options.species, options.snps_loc )
-    file_root, file_ext = os.path.splitext( snpcalls_file )
-    snpcalls_index_file = file_root + ".cdb"
-    snpcalls = gd.SnpcallsFile( data_file=snpcalls_file, index_file=snpcalls_index_file )
-
-    while snps.next():
-        seq, pos = snps.get_seq_pos()
-        flanking_dna = snpcalls.get_flanking_dna( sequence=seq, position=pos, format=options.output_format )
-        if flanking_dna:
-            out_fh.write( flanking_dna )
-
-    out_fh.close()
-
-if __name__ == "__main__":
-    main()
-


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/extract_flanking_dna.xml
--- a/tools/genome_diversity/extract_flanking_dna.xml
+++ /dev/null
@@ -1,93 +0,0 @@
-<tool id="gd_extract_flanking_dna" name="Extract" version="1.0.0">
-  <description>DNA flanking chosen SNPs</description>
-
-  <command interpreter="python2.5">
-    extract_flanking_dna.py "--input=$input" "--output=$output" "--snps_loc=${GALAXY_DATA_INDEX_DIR}/gd.snps.loc"
-    #if $override_metadata.choice == "0":
-      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
-    #else
-      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
-    #end if
-    "--output_format=$output_format"
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
-    <param name="output_format" type="select" format="integer" label="output format">
-        <option value="fasta" selected="true">FastA format</option>
-        <option value="primer3">Primer3 input</option>
-    </param>
-    <conditional name="override_metadata">
-      <param name="choice" type="select" format="integer" label="choose columns">
-        <option value="0" selected="true">No, get columns from metadata</option>
-        <option value="1" >Yes, choose columns</option>
-      </param>
-      <when value="0">
-        <!-- no options -->
-      </when>
-      <when value="1">
-        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
-        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
-        <param name="species" type="select" label="Choose species">
-          <options from_file="gd.species.txt">
-            <column name="name" index="1"/>
-            <column name="value" index="0"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-  </inputs>
-
-  <outputs>
-    <data format="txt" name="output"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="gd.sample.wsf" ftype="wsf"/>
-      <param name="output_format" value="primer3"/>
-      <param name="choice" value="0"/>
-      <output name="output" file="gd.extract_flanking_dna.txt"/>
-    </test>
-  </tests>
-
-  <help>
-**What it does**
-
-  It reports a DNA segment containing each SNP, with up to 200 nucleotides on
-  either side of the SNP position, which is indicated by "n". Fewer nucleotides
-  are reported if the SNP is near an end of the assembled genome fragment.
-
------
-
-**Example**
-
-- input file::
-
-    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
-    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
-    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
-    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
-    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
-    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
-    etc.
-
-- output file::
-
-    > chr2_75111355_75112576 314 A C
-    TATCTTCATTTTTATTATAGACTCTCTGAACCAATTTGCCCTGAGGCAGACTTTTTAAAGTACTGTGTAATGTATGAAGTCCTTCTGCTCAAGCAAATCATTGGCATGAAAACAGTTGCAAACTTATTGTGAGAGAAGAGTCCAAGAGTTTTAACAGTCTGTAAGTATATAGCCTGTGAGTTTGATTTCCTTCTTGTTTTTnTTCCAGAAACATGATCAGGGGCAAGTTCTATTGGATATAGTCTTCAAGCATCTTGATTTGACTGAGCGTGACTATTTTGGTTTGCAGTTGACTGACGATTCCACTGATAACCCAGTAAGTTTAAGCTGTTGTCTTTCATTGTCATTGCAATTTTTCTGTCTTTATACTAGGTCCTTTCTGATTTACATTGTTCACTGATT
-    > chr8_93901796_93905612 2471 A C
-    GCTGCCGCTGGATTTACTTCTGCTTGGGTCGAGAGCGGGCTGGATGGGTGAAGAGTGGGCTCCCCGGCCCCTGACCAGGCAGGTGCAGACAAGTCGGAAGAAGGCCCGCCGCATCTCCTTGCTGGCCAGCGTGTAGATGACGGGGTTCATGGCAGAGTTGAGCACGGCCAGCACGATGAACCACTGGGCCTTGAACAGGATnGCGCACTCCTTCACCTTGCAGGCCACATCCACAAGGAAAAGGATGAAGAGTGGGGACCAGCAGGCGATGAACACGCTCACCACGATCACCACGGTCCGCAGCAGGGCCATGGACCGCTCTGAGTTGTGCGGGCTGGCCACCCTGCGGCTGCTGGACTTCACCAGGAAGTAGATGCGTGCGTACAGGATCACGATGGTCAC
-    > chr10_7434473_7435447 524 T C
-    ATTATTAACAGAAACATTTCTTTTTCATTACCCAGGGGTTACACTGGTCGTTGATGTTAATCAGTTTTTGGAGAAGGAGAAGCAAAGTGATATTTTGTCTGTTCTGAAGCCTGCCGTTGGTAATACAAATGACGTAATCCCTGAATGTGCTGACAGGTACCATGACGCCCTGGCAAAAGCAAAAGAGCAAAAATCTAGAAGnGGTAAGCATCTTCACTGTTTAGCACAAATTAAATAGCACTTTGAATATGATGATTTCTGTGGTATTGTGTTATCTTACTTTTGAGACAAATAATCGCTTTCAAATGAATATTTCTGAATGTTTGTCATCTCTGGCAAGGAAATTTTTTAGTGTTTCTTTTCCTTTTTTGTCTTTTGGAAATCTGTGATTAACTTGGTGGC
-    > chr14_80021455_80022064 138 G A
-    ACCCAGGGATCAAACCCAGGTCTCCCGCATTGCAGGCGGATTCTTTACTGTCTGAGCCTCCAGGGAAGCCCTCGGGGCTGAAGGGATGGTTATGAAGGTGAGAAACAGGGGCCACCTGTCCCCAAGGTACCTTGCGACnTGCCATCTGCGCTCCACCAGTAAATGGACGTCTTCGATCCTTCTGTTGTTGGCGTAGTGCAAACGTTTGGGAAGGTGCTGTTTCAAGTAAGGCTTAAAGTGCTGGTCTGGTTTTTTACACTGAAATATAAATGGACATTGGATTTTGCAATGGAGAGTCTTCTAGAAGAGTCCAAGACATTCTCTCCAGAAAGCTGAAGG
-    > chr15_64470252_64471048 89 G A
-    TGTGTGTGTGTGTGTGTGTGTGTGCCTGTGTCTGTACATGCACACCACGTGGCCTCACCCAGTGCCCTCAGCTCCATGGTGATGTCCACnTAGCCGTGCTCCGCGCTGTAGTACATGGCCTCCTGGAGGGCCTTGGTGCGCGTCCGGCTCAGGCGCATGGGCCCCTCGCTGCCGCTGCCCTGGCTGGATGCATCGCTCTCTTCCACGCCCTCAGCCAGGATCTCCTCCAGGGACAGCACATCTGCTTTGGCCTGCTGTGGCTGAGTCAGGAGCTTCCTCAGGACGTTCCT
-    etc.
-  </help>
-</tool>


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/extract_primers.py
--- a/tools/genome_diversity/extract_primers.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/env python2.5
-
-import os
-import sys
-from optparse import OptionParser
-import genome_diversity as gd
-
-def main_function( parse_arguments=None ):
-    if parse_arguments is None:
-        parse_arguments = lambda arguments: ( None, arguments )
-    def main_decorator( to_decorate ):
-        def decorated_main( arguments=None ):
-            if arguments is None:
-                arguments = sys.argv
-            options, arguments = parse_arguments( arguments )
-            rc = 1
-            try:
-                rc = to_decorate( options, arguments )
-            except Exception, err:
-                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
-                traceback.print_exc()
-            finally:
-                sys.exit( rc )
-        return decorated_main
-    return main_decorator
-
-def parse_arguments( arguments ):
-    parser = OptionParser()
-    parser.add_option('--input',
-                        type='string', dest='input',
-                        help='file of selected SNPs')
-    parser.add_option('--output',
-                        type='string', dest='output',
-                        help='output file')
-    parser.add_option('--primers_loc',
-                        type='string', dest='primers_loc',
-                        help='primers .loc file')
-    parser.add_option('--scaffold_col',
-                        type="int", dest='scaffold_col',
-                        help='scaffold column in the input file')
-    parser.add_option('--pos_col',
-                        type="int", dest='pos_col',
-                        help='position column in the input file')
-    parser.add_option('--species',
-                        type="string", dest='species',
-                        help='species')
-    return parser.parse_args( arguments[1:] )
-
-
-@main_function( parse_arguments )
-def main( options, arguments ):
-    if not options.input:
-        raise RuntimeError( 'missing --input option' )
-    if not options.output:
-        raise RuntimeError( 'missing --output option' )
-    if not options.primers_loc:
-        raise RuntimeError( 'missing --primers_loc option' )
-    if not options.scaffold_col:
-        raise RuntimeError( 'missing --scaffold_col option' )
-    if not options.pos_col:
-        raise RuntimeError( 'missing --pos_col option' )
-    if not options.species:
-        raise RuntimeError( 'missing --species option' )
-    
-    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
-
-    out_fh = gd._openfile( options.output, 'w' )
-
-    primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
-    file_root, file_ext = os.path.splitext( primer_data_file )
-    primer_index_file = file_root + ".cdb"
-    primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
-
-    while snps.next():
-        seq, pos = snps.get_seq_pos()
-        primer = primers.get_entry( seq, pos )
-        if primer:
-            out_fh.write( primer )
-
-    out_fh.close()
-
-if __name__ == "__main__":
-    main()
-


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/extract_primers.xml
--- a/tools/genome_diversity/extract_primers.xml
+++ /dev/null
@@ -1,90 +0,0 @@
-<tool id="gd_extract_primers" name="Extract primers" version="1.0.0">
-  <description>for selected SNPs</description>
-
-  <command interpreter="python2.5">
-    extract_primers.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc"
-    #if $override_metadata.choice == "0":
-      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
-    #else
-      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
-    #end if
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
-    <conditional name="override_metadata">
-      <param name="choice" type="select" format="integer" label="choose columns">
-        <option value="0" selected="true">No, get columns from metadata</option>
-        <option value="1" >Yes, choose columns</option>
-      </param>
-      <when value="0">
-        <!-- no options -->
-      </when>
-      <when value="1">
-        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
-        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
-        <param name="species" type="select" label="Choose species">
-          <options from_file="gd.species.txt">
-            <column name="name" index="1"/>
-            <column name="value" index="0"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-  </inputs>
-
-  <outputs>
-    <data format="txt" name="output"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="gd.sample.wsf" ftype="wsf"/>
-      <param name="choice" value="0"/>
-      <output name="output" file="gd.extract_primers.txt"/>
-    </test>
-  </tests>
-
-
-  <help>
-**What it does**
-
-  This tool extracts primers for SNPs in the dataset using the Primer3 program.
-  The first line of output for a given SNP reports the name of the assembled
-  contig, the SNP's position in the contig, the two variant nucleotides, and
-  Primer3's "pair penalty".  The next line, if not blank, names restriction
-  enzymes (from the user-adjustable list) that differentially cut at that
-  site, but do not cut at any other position between and including the
-  primer positions.  The next lines show the SNP's flanking regions, with
-  the SNP position indicated by "n", including the primer positions and an
-  additional 3 nucleotides.
-
------
-
-**Example**
-
-- input file::
-
-    chr5_30800874_30802049    734   G  A  chr5   30801606   A  24  0  99   4  11  97   Y  496  0.502  0.033  0.215  6
-    chr8_55117827_55119487    994   A  G  chr8   55118815   G  25  0  102  4  11  96   Y  22   0.502  0.025  2.365  1
-    chr9_100484836_100485311  355   C  T  chr9   100485200  T  27  0  108  6  17  100  Y  190  0.512  0.880  2.733  4
-    chr12_3635530_3637738     2101  T  C  chr12  3637630    T  25  0  102  4  13  93   Y  169  0.554  0.024  0.366  4
-
-- output file::
-
-    chr5_30800874_30802049 734 G A 0.352964
-     BglII,MboI,Sau3AI,Tru9I,XhoII
-      1 CTGAAGGTGAGCAGGATTCAGGAGACAGAAAACAAAGCCCAGGCCTGCCCAAGGTGGAAA
-           >>>>>>>>>>>>>>>>>>>>
-     
-     61 AGTCTAACAACTCGCCCTCTGCTTAnATCTGAGACTCACAGGGATAATAACACACTTGGT
-     
-     
-     21 CAAGGAATAAACTAGATATTATTCACTCCTCTAGAAGGCTGCCAGGAAAATTGCCTGACT
-                                                             <<<<<<<
-     
-    181 TGAACCTTGGCTCTGA
-        <<<<<<<<<<<<<
-    etc.
-  </help>
-</tool>


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/genome_diversity.py
--- a/tools/genome_diversity/genome_diversity.py
+++ /dev/null
@@ -1,266 +0,0 @@
-#!/usr/bin/env python2.5
-
-import sys
-import cdblib
-
-def _openfile( filename=None, mode='r' ):
-    try:
-        fh = open( filename, mode )
-    except IOError, err:
-        raise RuntimeError( "can't open file: %s\n" % str( err ) )
-    return fh
-
-def get_filename_from_loc( species=None, filename=None ):
-    fh = _openfile( filename )
-    for line in fh:
-        if line and not line.startswith( '#' ):
-            line = line.rstrip( '\r\n' )
-            if line:
-                elems = line.split( '\t' )
-                if len( elems ) >= 2 and elems[0] == species:
-                    return elems[1]
-    return None
-
-
-class SnpFile( object ):
-    def __init__( self, filename=None, seq_col=1, pos_col=2, ref_seq_col=7, ref_pos_col=8 ):
-        self.filename = filename
-        self.fh = _openfile( filename )
-        self.seq_col = seq_col
-        self.pos_col = pos_col
-        self.ref_seq_col = ref_seq_col
-        self.ref_pos_col = ref_pos_col
-        self.elems = None
-        self.line = None
-        self.comments = []
-
-    def next( self ):
-        while self.fh:
-            try:
-                self.line = self.fh.next()
-            except StopIteration:
-                self.line = None
-                self.elems = None
-                return None
-            if self.line:
-                self.line = self.line.rstrip( '\r\n' )
-                if self.line:
-                    if self.line.startswith( '#' ):
-                        self.comments.append( self.line )
-                    else:
-                        self.elems = self.line.split( '\t' )
-                        return 1
-
-    def get_seq_pos( self ):
-        if self.elems:
-            return self.elems[ self.seq_col - 1 ], self.elems[ self.pos_col - 1 ]
-        else:
-            return None, None
-
-    def get_ref_seq_pos( self ):
-        if self.elems:
-            return self.elems[ self.ref_seq_seq - 1 ], self.elems[ self.ref_pos_col - 1 ]
-        else:
-            return None, None
-
-
-class IndexedFile( object ):
-
-    def __init__( self, data_file=None, index_file=None ):
-        self.data_file = data_file
-        self.index_file = index_file
-        self.data_fh = _openfile( data_file )
-        self.index_fh = _openfile( index_file )
-        self._reader = cdblib.Reader( self.index_fh.read(), hash )
-
-    def get_indexed_line( self, key=None ):
-        line = None
-        if key in self._reader:
-            offset = self._reader.getint( key )
-            self.data_fh.seek( offset )
-            try:
-                line = self.data_fh.next()
-            except StopIteration:
-                raise RuntimeError( 'index file out of sync for %s' % key )
-        return line
-
-class PrimersFile( IndexedFile ):
-    def get_primer_header( self, sequence=None, position=None ):
-        key = "%s %s" % ( str( sequence ), str( position ) )
-        header = self.get_indexed_line( key )
-        if header:
-            if header.startswith( '>' ):
-                elems = header.split()
-                if len( elems ) < 3:
-                    raise RuntimeError( 'short primers header for %s' % key )
-                if sequence != elems[1] or str( position ) != elems[2]:
-                    raise RuntimeError( 'primers index for %s finds %s %s' % ( key, elems[1], elems[2] ) )
-            else:
-                raise RuntimeError( 'primers index out of sync for %s' % key )
-        return header
-
-    def get_entry( self, sequence=None, position=None ):
-        entry = self.get_primer_header( sequence, position )
-        if entry:
-            while self.data_fh:
-                try:
-                    line = self.data_fh.next()
-                except StopIteration:
-                    break
-                if line.startswith( '>' ):
-                    break
-                entry += line
-        return entry
-
-    def get_enzymes( self, sequence=None, position=None ):
-        entry = self.get_primer_header( sequence, position )
-        enzyme_list = []
-        if entry:
-            try:
-                line = self.data_fh.next()
-            except StopIteration:
-                raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) )
-            if line.startswith( '>' ):
-                raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) )
-            line.rstrip( '\r\n' )
-            if line:
-                enzymes = line.split( ',' )
-                for enzyme in enzymes:
-                    enzyme = enzyme.strip()
-                    if enzyme:
-                        enzyme_list.append( enzyme )
-        return enzyme_list
-
-class SnpcallsFile( IndexedFile ):
-    def get_snp_seq( self, sequence=None, position=None ):
-        key = "%s %s" % ( str( sequence ), str( position ) )
-        line = self.get_indexed_line( key )
-        if line:
-            elems = line.split( '\t' )
-            if len (elems) < 3:
-                raise RuntimeError( 'short snpcalls line for %s' % key )
-            if sequence != elems[0] or str( position ) != elems[1]:
-                raise RuntimeError( 'snpcalls index for %s finds %s %s' % ( key, elems[0], elems[1] ) )
-            return elems[2]
-        else:
-            return None
-
-    def get_flanking_dna( self, sequence=None, position=None, format='fasta' ):
-        if format != 'fasta' and format != 'primer3':
-            raise RuntimeError( 'invalid format for flanking dna: %s' % str( format ) )
-        seq = self.get_snp_seq( sequence, position )
-        if seq:
-            p = seq.find('[')
-            if p == -1:
-                raise RuntimeError( 'snpcalls entry for %s %s missing left bracket: %s' % ( str( sequence ), str( position ), seq ) )
-            q = seq.find(']', p + 1)
-            if q == -1:
-                raise RuntimeError( 'snpcalls entry for %s %s missing right bracket: %s' % ( str( sequence ), str( position ), seq ) )
-            q += 1
-
-            if format == 'fasta':
-                flanking_seq = '> '
-            else:
-                flanking_seq = 'SEQUENCE_ID='
-
-            flanking_seq += "%s %s %s %s\n" % ( str( sequence ), str( position ), seq[p+1], seq[p+3] )
-
-            if format == 'primer3':
-                flanking_seq += 'SEQUENCE_TEMPLATE='
-
-            flanking_seq += "%sn%s\n" % ( seq[0:p], seq[q:] )
-
-            if format == 'primer3':
-                flanking_seq += "SEQUENCE_TARGET=%d,11\n=\n" % ( p - 5 )
-
-            return flanking_seq
-        else:
-            return None
-
-
-
-class LocationFile( object ):
-    def __init__(self, filename):
-        self.build_map(filename)
-
-    def build_map(self, filename):
-        self.map = {}
-        self.open_file(filename)
-        for line in self.read_lines():
-            elems = line.split('\t', 1)
-            if len(elems) == 2:
-                self.map[ elems[0].strip() ] = elems[1].strip()
-        self.close_file()
-
-    def read_lines(self):
-        for line in self.fh:
-            if not line.startswith('#'):
-                line = line.rstrip('\r\n')
-                yield line
-
-    def open_file(self, filename):
-        self.filename = filename
-        try:
-            self.fh = open(filename, 'r')
-        except IOError, err:
-            print >> sys.stderr, "Error opening location file '%s': %s" % (filename, str(err))
-            sys.exit(1)
-
-    def close_file(self):
-        self.fh.close()
-
-    def loc_file( self, key ):
-        if key in self.map:
-            return self.map[key]
-        else:
-            print >> sys.stderr, "'%s' does not appear in location file '%s'" % (key, self.filename)
-            sys.exit(1)
-        
-class ChrLens( object ):
-    def __init__( self, location_file, species ):
-        self.chrlen_loc = LocationFile( location_file )
-        self.chrlen_filename = self.chrlen_loc.loc_file( species )
-        self.build_map()
-
-    def build_map(self):
-        self.map = {}
-        self.open_file(self.chrlen_filename)
-        for line in self.read_lines():
-            elems = line.split('\t', 1)
-            if len(elems) == 2:
-                chrom = elems[0].strip()
-                chrom_len_text = elems[1].strip()
-                try:
-                    chrom_len = int( chrom_len_text )
-                except ValueError:
-                    print >> sys.stderr, "Bad length '%s' for chromosome '%s' in '%s'" % (chrom_len_text, chrom, self.chrlen_filename)
-                self.map[ chrom ] = chrom_len
-        self.close_file()
-
-    def read_lines(self):
-        for line in self.fh:
-            if not line.startswith('#'):
-                line = line.rstrip('\r\n')
-                yield line
-
-    def open_file(self, filename):
-        self.filename = filename
-        try:
-            self.fh = open(filename, 'r')
-        except IOError, err:
-            print >> sys.stderr, "Error opening chromosome length file '%s': %s" % (filename, str(err))
-            sys.exit(1)
-
-    def close_file(self):
-        self.fh.close()
-
-    def length( self, key ):
-        if key in self.map:
-            return self.map[key]
-        else:
-            return None
-
-    def __iter__( self ):
-        for chrom in self.map:
-            yield chrom
-


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/select_restriction_enzymes.py
--- a/tools/genome_diversity/select_restriction_enzymes.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env python2.5
-
-import os
-import sys
-from optparse import OptionParser
-import genome_diversity as gd
-
-def main_function( parse_arguments=None ):
-    if parse_arguments is None:
-        parse_arguments = lambda arguments: ( None, arguments )
-    def main_decorator( to_decorate ):
-        def decorated_main( arguments=None ):
-            if arguments is None:
-                arguments = sys.argv
-            options, arguments = parse_arguments( arguments )
-            rc = 1
-            try:
-                rc = to_decorate( options, arguments )
-            except Exception, err:
-                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
-                traceback.print_exc()
-            finally:
-                sys.exit( rc )
-        return decorated_main
-    return main_decorator
-
-def parse_arguments( arguments ):
-    parser = OptionParser()
-    parser.add_option('--input',
-                        type='string', dest='input',
-                        help='file of selected SNPs')
-    parser.add_option('--output',
-                        type='string', dest='output',
-                        help='output file')
-    parser.add_option('--primers_loc',
-                        type='string', dest='primers_loc',
-                        help='primers .loc file')
-    parser.add_option('--scaffold_col',
-                        type="int", dest='scaffold_col',
-                        help='scaffold column in the input file')
-    parser.add_option('--pos_col',
-                        type="int", dest='pos_col',
-                        help='position column in the input file')
-    parser.add_option('--enzyme_list',
-                        type="string", dest='enzyme_list_string',
-                        help='comma separated list of enzymes')
-    parser.add_option('--species',
-                        type="string", dest='species',
-                        help='species')
-    return parser.parse_args( arguments[1:] )
-
-
-@main_function( parse_arguments )
-def main( options, arguments ):
-    if not options.input:
-        raise RuntimeError( 'missing --input option' )
-    if not options.output:
-        raise RuntimeError( 'missing --output option' )
-    if not options.primers_loc:
-        raise RuntimeError( 'missing --primers_loc option' )
-    if not options.scaffold_col:
-        raise RuntimeError( 'missing --scaffold_col option' )
-    if not options.pos_col:
-        raise RuntimeError( 'missing --pos_col option' )
-    if not options.enzyme_list_string:
-        raise RuntimeError( 'missing --enzyme_list option' )
-    if not options.species:
-        raise RuntimeError( 'missing --species option' )
-    
-    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
-
-    out_fh = gd._openfile( options.output, 'w' )
-
-    enzyme_dict = {}
-    for enzyme in options.enzyme_list_string.split( ',' ):
-        enzyme = enzyme.strip()
-        if enzyme:
-            enzyme_dict[enzyme] = 1
-
-    primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
-    file_root, file_ext = os.path.splitext( primer_data_file )
-    primer_index_file = file_root + ".cdb"
-    primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
-
-    comments_printed = False
-
-    while snps.next():
-        seq, pos = snps.get_seq_pos()
-        enzyme_list = primers.get_enzymes( seq, pos )
-        for enzyme in enzyme_list:
-            if enzyme in enzyme_dict:
-                if not comments_printed:
-                    for comment in snps.comments:
-                        out_fh.write( "%s\n" % comment )
-                    comments_printed = True
-                out_fh.write( "%s\n" % snps.line )
-                break
-
-    out_fh.close()
-
-if __name__ == "__main__":
-    main()
-


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/select_restriction_enzymes.xml
--- a/tools/genome_diversity/select_restriction_enzymes.xml
+++ /dev/null
@@ -1,90 +0,0 @@
-<tool id="gd_select_restriction_enzymes" name="Specify" version="1.0.0">
-  <description>a set of restriction enzymes</description>
-
-  <command interpreter="python2.5">
-    select_restriction_enzymes.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc"
-    #if $override_metadata.choice == "0":
-      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
-    #else
-      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
-    #end if
-    "--enzyme_list=$enzymes"
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
-    <conditional name="override_metadata">
-      <param name="choice" type="select" format="integer" label="choose columns">
-        <option value="0" selected="true">No, get columns from metadata</option>
-        <option value="1" >Yes, choose columns</option>
-      </param>
-      <when value="0">
-        <!-- no options -->
-      </when>
-      <when value="1">
-        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
-        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
-        <param name="species" type="select" label="Choose species">
-          <options from_file="gd.species.txt">
-            <column name="name" index="1"/>
-            <column name="value" index="0"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-
-    <param name="enzymes" type="select" display="checkboxes" multiple="true" label="Choose enzymes">
-        <options from_file="gd.restriction_enzymes.txt">
-            <column name="name" index="0"/>
-            <column name="value" index="1"/>
-        </options>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data format="wsf" name="output" metadata_source="input"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="gd.sample.wsf" ftype="wsf"/>
-      <param name="choice" value="0"/>
-      <param name="enzymes" value="BanI,BstOI,Hsp92II"/>
-      <output name="output" file="gd.select_restriction_enzymes.wsf"/>
-    </test>
-  </tests>
-
-  <help>
-**What it does**
-
-  It selects the SNPs that are differentially cut by at least one of the
-  specified restriction enzymes. The enzymes are required to cut the amplified
-  segment (for the specified PCR primers) only at the SNP.
-
------
-
-**Example**
-
-- input file::
-
-    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
-    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
-    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
-    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
-    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
-    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
-    etc.
-
-- output file::
-
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
-    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
-    etc.
-  </help>
-</tool>


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/select_snps.py
--- a/tools/genome_diversity/select_snps.py
+++ /dev/null
@@ -1,148 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import math
-from optparse import OptionParser
-import genome_diversity as gd
-
-def main_function(parse_arguments=None):
-    if parse_arguments is None:
-        parse_arguments = lambda arguments: (None, arguments)
-    def main_decorator(to_decorate):
-        def decorated_main(arguments=None):
-            if arguments is None:
-                arguments = sys.argv
-            options, arguments = parse_arguments(arguments)
-            sys.exit(to_decorate(options, arguments))
-        return decorated_main
-    return main_decorator
-
-def parse_arguments(arguments):
-    parser = OptionParser()
-    parser.add_option('--input', dest='input')
-    parser.add_option('--output', dest='output')
-    parser.add_option('--chrlens_loc', dest='chrlens_loc')
-    parser.add_option('--num_snps', dest='num_snps')
-    parser.add_option('--ref_chrom_col', dest='ref_chrom_col')
-    parser.add_option('--ref_pos_col', dest='ref_pos_col')
-    parser.add_option('--species', dest='species')
-    return parser.parse_args(arguments[1:])
-
-@main_function(parse_arguments)
-def main(options, arguments):
-
-    ref_chrom_idx = to_int( options.ref_chrom_col ) -1
-    ref_pos_idx = to_int( options.ref_pos_col ) -1
-
-    if (ref_chrom_idx < 1) or (ref_pos_idx < 1) or (ref_chrom_idx == ref_pos_idx):
-        print >> sys.stderr, "Cannot locate reference genome sequence (ref) or reference genome position (rPos) column for this dataset."
-        sys.exit(1)
-
-    chrlens = gd.ChrLens( options.chrlens_loc, options.species )
-
-    total_len = 0
-    for chrom in chrlens:
-        total_len += chrlens.length(chrom)
-
-    total_requested = int( options.num_snps )
-    lines, data, comments = get_snp_lines_data_and_comments( options.input, ref_chrom_idx, ref_pos_idx )
-    selected = select_snps( data, total_len, total_requested )
-    out_data = fix_selection_and_order_like_input(data, selected, total_requested)
-    write_selected_snps( options.output, out_data, lines, comments )
-
-def to_int( value ):
-    try:
-        int_value = int( value )
-    except ValueError:
-        int_value = 0
-    return int_value
-
-def get_snp_lines_data_and_comments( filename, chrom_idx, pos_idx ):
-    fh = open( filename, 'r' )
-    if (chrom_idx >= pos_idx):
-        needed = chrom_idx + 1
-    else:
-        needed = pos_idx + 1
-    lines = []
-    data = []
-    comments = []
-    line_idx = 0
-    line_num = 0
-    for line in fh:
-        line_num += 1
-        line = line.rstrip('\r\n')
-        if line:
-            if line.startswith('#'):
-                comments.append(line)
-            else:
-                elems = line.split('\t')
-                if len(elems) >= needed:
-                    chrom = elems[chrom_idx]
-                    try:
-                        pos = int(elems[pos_idx])
-                    except ValueError:
-                        sys.stderr.write( "bad reference position in line %d column %d: %s\n" % ( line_num, pos_idx+1, elems[pos_idx] ) )
-                        sys.exit(1)
-                    lines.append(line)
-                    chrom_sort = chrom.lstrip('chr')
-                    data.append( [chrom_sort, chrom, pos, line_num, line_idx] )
-                    line_idx += 1
-    fh.close()
-    data = sorted( data, key=lambda x: (x[0], x[2]) )
-    return lines, data, comments
-
-def select_snps( data, total_len, requested ):
-    old_chrom = None
-    next_print = 0
-    selected = []
-    space = total_len / requested
-    for data_idx, datum in enumerate( data ):
-        chrom = datum[1]
-        pos = datum[2]
-        if chrom != old_chrom:
-            old_chrom = chrom
-            next_print = 0
-        if pos >= next_print:
-            selected.append(data_idx)
-            next_print += space
-    return selected
-
-def fix_selection_and_order_like_input(data, selected, requested):
-    total_selected = len( selected )
-    a = float( total_selected ) / requested
-    b = a / 2
-
-    idx_list = []
-    for i in range( requested ):
-        idx = int( math.ceil( i * a + b ) - 1 )
-        idx_list.append( idx )
-
-    out_data = []
-
-    for i, data_idx in enumerate(selected):
-        if total_selected > requested:
-            if i in idx_list:
-                out_data.append(data[data_idx])
-        else:
-            out_data.append(data[data_idx])
-
-    out_data = sorted( out_data, key=lambda x: x[3] )
-
-    return out_data
-
-def write_selected_snps( filename, data, lines, comments ):
-    fh = open( filename, 'w' )
-
-    for comment in comments:
-        fh.write("%s\n" % comment )
-
-    for datum in data:
-        line_idx = datum[4]
-        fh.write("%s\n" % lines[line_idx])
-
-    fh.close()
-
-if __name__ == "__main__":
-    main()
-
-


diff -r 0cab089a6ad7305c3ec968c39a7032b640376363 -r 849fcfda099e3fb55a0cad7e0e478142930d2e8e tools/genome_diversity/select_snps.xml
--- a/tools/genome_diversity/select_snps.xml
+++ /dev/null
@@ -1,87 +0,0 @@
-<tool id="gd_select_snps" name="Select" version="1.0.0">
-  <description>a specified number of SNPs</description>
-
-  <command interpreter="python">
-    select_snps.py "--input=$input" "--output=$output" "--chrlens_loc=${GALAXY_DATA_INDEX_DIR}/gd.chrlens.loc" "--num_snps=$num_snps"
-    #if $override_metadata.choice == "0":
-      "--ref_chrom_col=${input.metadata.ref}" "--ref_pos_col=${input.metadata.rPos}" "--species=${input.metadata.species}"
-    #else
-      "--ref_chrom_col=$ref_col" "--ref_pos_col=$rpos_col" "--species=$species"
-    #end if
-  </command>
-
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
-    <param name="num_snps" type="integer" value="10" optional="false" min="1" label="Number of SNPs"/>
-    <conditional name="override_metadata">
-      <param name="choice" type="select" format="integer" label="choose columns">
-        <option value="0" selected="true">No, get columns from metadata</option>
-        <option value="1" >Yes, choose columns</option>
-      </param>
-      <when value="0">
-        <!-- no options -->
-      </when>
-      <when value="1">
-        <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome"/>
-        <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position"/>
-        <param name="species" type="select" label="Choose species">
-          <options from_file="gd.species.txt">
-            <column name="name" index="1"/>
-            <column name="value" index="0"/>
-          </options>
-        </param>
-      </when>
-    </conditional>
-  </inputs>
-
-  <outputs>
-    <data format="wsf" name="output" metadata_source="input"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="gd.sample.wsf" ftype="wsf"/>
-      <param name="num_snps" value="5"/>
-      <param name="choice" value="0"/>
-      <output name="output" file="gd.select_snps.wsf"/>
-    </test>
-  </tests>
-
-
-  <help>
-**What it does**
-
-  It attempts to select a specified number of SNPs from the dataset, making them
-  approximately uniformly spaced relative to the reference genome. The number
-  actually selected may be slightly more than the specified number.
-
------
-
-**Example**
-
-- input file::
-
-    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
-    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
-    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
-    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
-    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
-    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
-    etc.
-
-- output file::
-
-    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
-    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
-    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
-    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
-    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
-    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
-    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
-    etc.
-  </help>
-</tool>

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

[galaxy-commits] commit/galaxy-central: natefoo: Miller Lab's Genome Diversity tools moved to the Tool Shed.

Bitbucket