commit/galaxy-central: 2 new changesets

4 Feb 2014

2 new commits in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/935942afdf70/
Changeset:   935942afdf70
User:        nsoranzo
Date:        2014-01-27 19:51:46
Summary:     Remove unused imports and unused variables. Fix spacing.
Affected #:  17 files

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/WeightedAverage.py

--- a/tools/regVariation/WeightedAverage.py
+++ b/tools/regVariation/WeightedAverage.py
@@ -1,20 +1,16 @@
 #!/usr/bin/env python
 """
-
 usage: %prog bed_file_1 bed_file_2 out_file
     -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file
     -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file
 """
-from galaxy import eggs
 
 import collections
-import sys, string
+import sys
 #import numpy
 from galaxy import eggs
 import pkg_resources
 pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
 from galaxy.tools.util.galaxyops import *
 from bx.cookbook import doc_optparse
 
@@ -27,77 +23,72 @@
     sys.exit()
 
 
-def FindRate(chromosome,start_stop,dictType):
-    OverlapList=[]
+def FindRate(chromosome, start_stop, dictType):
+    OverlapList = []
     for tempO in dictType[chromosome]:
-        DatabaseInterval=[tempO[0],tempO[1]]
-        Overlap=GetOverlap(start_stop,DatabaseInterval)
-        if Overlap>0:
-            OverlapList.append([Overlap,tempO[2]])
-                                 
-    if len(OverlapList)>0:
-   
-        SumRecomb=0
-        SumOverlap=0
+        DatabaseInterval = [tempO[0], tempO[1]]
+        Overlap = GetOverlap( start_stop, DatabaseInterval )
+        if Overlap > 0:
+            OverlapList.append([Overlap, tempO[2]])
+    
+    if len(OverlapList) > 0:
+        SumRecomb = 0
+        SumOverlap = 0
         for member in OverlapList:
-            SumRecomb+=member[0]*member[1]
-            SumOverlap+=member[0]    
-        averageRate=SumRecomb/SumOverlap
-                               
+            SumRecomb += member[0]*member[1]
+            SumOverlap += member[0]
+        averageRate = SumRecomb/SumOverlap
         return averageRate
-                               
     else:
         return 'NA'
-                                                       
-                           
-    
-def GetOverlap(a,b):
-    return min(a[1],b[1])-max(a[0],b[0])
+
+
+def GetOverlap(a, b):
+    return min(a[1], b[1])-max(a[0], b[0])
+
 
 options, args = doc_optparse.parse( __doc__ )
 
 try:
     chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 )
-    chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 )      
+    chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 )
     input1, input2, input3 = args
 except Exception, eee:
     print eee
     stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
-    
 
+fd2 = open(input2)
+lines2 = fd2.readlines()
+RecombChrDict = collections.defaultdict(list)
 
-fd2=open(input2)
-lines2=fd2.readlines()
-RecombChrDict=collections.defaultdict(list)
-
-skipped=0
+skipped = 0
 for line in lines2:
-    temp=line.strip().split()
+    temp = line.strip().split()
     try:
         assert float(temp[int(name_col_2)])
     except:
-        skipped+=1
+        skipped += 1
         continue
-    tempIndex=[int(temp[int(start_col_2)]),int(temp[int(end_col_2)]),float(temp[int(name_col_2)])]
+    tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])]
     RecombChrDict[temp[int(chr_col_2)]].append(tempIndex)
 
-print "Skipped %d features with invalid values" %(skipped)
+print "Skipped %d features with invalid values" % (skipped)
 
-fd1=open(input1)
-lines=fd1.readlines()
-finalProduct=''
+fd1 = open(input1)
+lines = fd1.readlines()
+finalProduct = ''
 for line in lines:
-    temp=line.strip().split('\t')
-    chromosome=temp[int(chr_col_1)]
-    start=int(temp[int(start_col_1)])
-    stop=int(temp[int(end_col_1)])
-    start_stop=[start,stop]
-    RecombRate=FindRate(chromosome,start_stop,RecombChrDict)
+    temp = line.strip().split('\t')
+    chromosome = temp[int(chr_col_1)]
+    start = int(temp[int(start_col_1)])
+    stop = int(temp[int(end_col_1)])
+    start_stop = [start, stop]
+    RecombRate = FindRate( chromosome, start_stop, RecombChrDict )
     try:
-        RecombRate="%.4f" %(float(RecombRate))
+        RecombRate = "%.4f" % (float(RecombRate))
     except:
-        RecombRate=RecombRate
-    finalProduct+=line.strip()+'\t'+str(RecombRate)+'\n'
-fdd=open(input3,'w')
+        RecombRate = RecombRate
+    finalProduct += line.strip()+'\t'+str(RecombRate)+'\n'
+fdd = open(input3, 'w')
 fdd.writelines(finalProduct)
 fdd.close()

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/best_regression_subsets.py
--- a/tools/regVariation/best_regression_subsets.py
+++ b/tools/regVariation/best_regression_subsets.py
@@ -2,7 +2,7 @@
 
 from galaxy import eggs
 
-import sys, string
+import sys
 from rpy import *
 import numpy
 
@@ -10,19 +10,20 @@
     sys.stderr.write(msg)
     sys.exit()
 
+
 infile = sys.argv[1]
 y_col = int(sys.argv[2])-1
 x_cols = sys.argv[3].split(',')
 outfile = sys.argv[4]
 outfile2 = sys.argv[5]
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 )
 fout = open(outfile,'w')
 
 for i, line in enumerate( file ( infile )):
     line = line.rstrip('\r\n')
     if len( line )>0 and not line.startswith( '#' ):
         elems = line.split( '\t' )
-        break 
+        break
     if i == 30:
         break # Hopefully we'll never get here...
 
@@ -32,12 +33,12 @@
 y_vals = []
 x_vals = []
 
-for k,col in enumerate(x_cols):
+for k, col in enumerate(x_cols):
     x_cols[k] = int(col)-1
     x_vals.append([])
     
 NA = 'NA'
-for ind,line in enumerate( file( infile )):
+for ind, line in enumerate( file( infile ) ):
     if line and not line.startswith( '#' ):
         try:
             fields = line.split("\t")
@@ -46,7 +47,7 @@
             except Exception, ey:
                 yval = r('NA')
             y_vals.append(yval)
-            for k,col in enumerate(x_cols):
+            for k, col in enumerate(x_cols):
                 try:
                     xval = float(fields[col])
                 except Exception, ex:
@@ -59,10 +60,10 @@
 
 x_vals1 = numpy.asarray(x_vals).transpose()
 
-dat= r.list(x=array(x_vals1), y=y_vals)
+dat = r.list(x=array(x_vals1), y=y_vals)
 
 r.library("leaps")
- 
+
 set_default_mode(NO_CONVERSION)
 try:
     leaps = r.regsubsets(r("y ~ x"), data= r.na_exclude(dat))
@@ -75,10 +76,10 @@
 pattern = "["
 for i in range(tot):
     pattern = pattern + 'c' + str(int(x_cols[int(i)]) + 1) + ' '
-pattern = pattern.strip() + ']'  
-print >>fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" %(pattern)
-for ind,item in enumerate(summary['outmat']):
-    print >>fout, "%s\t%s\t%s\t%s\t%s\t%s" %(str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind])
+pattern = pattern.strip() + ']'
+print >> fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" % (pattern)
+for ind, item in enumerate(summary['outmat']):
+    print >> fout, "%s\t%s\t%s\t%s\t%s\t%s" % (str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind])
 
 
 r.pdf( outfile2, 8, 8 )

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/featureCounter.py
--- a/tools/regVariation/featureCounter.py
+++ b/tools/regVariation/featureCounter.py
@@ -11,8 +11,7 @@
 from galaxy import eggs
 import pkg_resources
 pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
+import sys, fileinput
 from bx.intervals.io import *
 from bx.cookbook import doc_optparse
 from bx.intervals.operations import quicksect
@@ -33,7 +32,7 @@
             partial += 1
         if node.left and node.left.maxend > start:
             counter(node.left, start, end)
-        if node.right: 
+        if node.right:
             counter(node.right, start, end)
     elif start < node.start < end:
         if node.end <= end:
@@ -42,10 +41,10 @@
             partial += 1
         if node.left and node.left.maxend > start:
             counter(node.left, start, end)
-        if node.right: 
+        if node.right:
             counter(node.right, start, end)
     else:
-        if node.left: 
+        if node.left:
             counter(node.left, start, end)
 
 def count_coverage( readers, comments=True ):
@@ -58,8 +57,8 @@
         if type( item ) is GenomicInterval:
             rightTree.insert( item, secondary.linenum, item.fields )
     
-    bitsets = secondary_copy.binned_bitsets() 
-        
+    bitsets = secondary_copy.binned_bitsets()
+    
     global full, partial
     
     for interval in primary:
@@ -82,7 +81,7 @@
                 bases_covered = bitsets[ chrom ].count_range( start, end-start )
                 if (end - start) == 0:
                     percent = 0
-                else: 
+                else:
                     percent = float(bases_covered) / float(end - start)
                 if bases_covered:
                     root = rightTree.chroms[chrom]    #root node for the chrom tree
@@ -92,13 +91,14 @@
             interval.fields.append(str(full))
             interval.fields.append(str(partial))
             yield interval
-    
+
+
 def main():
     options, args = doc_optparse.parse( __doc__ )
     
     try:
         chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )      
+        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
         in1_fname, in2_fname, out_fname = args
     except:
         stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
@@ -126,7 +126,7 @@
     out_file = open( out_fname, "w" )
 
     try:
-        for line in count_coverage([g1,g2,g2_copy]):
+        for line in count_coverage([g1, g2, g2_copy]):
             if type( line ) is GenomicInterval:
                 out_file.write( "%s\n" % "\t".join( line.fields ) )
             else:
@@ -143,6 +143,7 @@
         print skipped( g2, filedesc=" of 2nd dataset" )
     elif g2_copy.skipped > 0:
         print skipped( g2_copy, filedesc=" of 2nd dataset" )
-        
+
+
 if __name__ == "__main__":
     main()

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/getIndelRates_3way.py
--- a/tools/regVariation/getIndelRates_3way.py
+++ b/tools/regVariation/getIndelRates_3way.py
@@ -6,7 +6,6 @@
 pkg_resources.require( "bx-python" )
 
 import sys, os, tempfile
-import traceback
 import fileinput
 from warnings import warn
 
@@ -18,7 +17,8 @@
 def stop_err(msg):
     sys.stderr.write(msg)
     sys.exit()
-    
+
+
 def counter(node, start, end, sort_col):
     global full, blk_len, blk_list
     if node.start < start:
@@ -31,14 +31,14 @@
             blk_len += int(node.other[sort_col+2])
         if node.left and node.left.maxend > start:
             counter(node.left, start, end, sort_col)
-        if node.right: 
+        if node.right:
             counter(node.right, start, end, sort_col)
     elif node.start > end:
-        if node.left: 
+        if node.left:
             counter(node.left, start, end, sort_col)
-            
 
-infile = sys.argv[1]  
+
+infile = sys.argv[1]
 fout = open(sys.argv[2],'w')
 int_file = sys.argv[3]
 if int_file != "None": #User has specified an interval file
@@ -48,9 +48,9 @@
         chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] )
     except:
         stop_err("Unable to open input Interval file")
-        
+
+
 def main():
-
     for i, line in enumerate( file ( infile )):
         line = line.rstrip('\r\n')
         if len( line )>0 and not line.startswith( '#' ):
@@ -86,8 +86,7 @@
             break
         except:
             continue
-        
-        
+    
     fin = open(infile, 'r')
     skipped = 0
     
@@ -98,7 +97,7 @@
             os.system(cmdline)
         except:
             stop_err("Encountered error while sorting the input file.")
-        print >>fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" %(species[0],species[1],species[2],species[0],species[1],species[2])
+        print >> fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" % ( species[0], species[1], species[2], species[0], species[1], species[2] )
         prev_bnum = -1
         sorted_infile.seek(0)
         for line in sorted_infile.readlines():
@@ -112,16 +111,16 @@
                     if prev_bnum != -1:
                         irate = []
                         drate = []
-                        for i,elem in enumerate(inserts):
+                        for i, elem in enumerate(inserts):
                             try:
-                                irate.append(str("%.2e" %(inserts[i]/blen[i])))
+                                irate.append(str("%.2e" % (inserts[i]/blen[i])))
                             except:
                                 irate.append('0')
                             try:
-                                drate.append(str("%.2e" %(deletes[i]/blen[i])))
+                                drate.append(str("%.2e" % (deletes[i]/blen[i])))
                             except:
                                 drate.append('0')
-                        print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate))
+                        print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) )
                     inserts = [0.0, 0.0, 0.0]
                     deletes = [0.0, 0.0, 0.0]
                     blen = []
@@ -134,25 +133,24 @@
                     inserts[sp_ind] += 1
                 elif elems[1].endswith('delete'):
                     deletes[sp_ind] += 1
-                prev_bnum = new_bnum 
+                prev_bnum = new_bnum
             except Exception, ei:
                 #print >>sys.stderr, ei
                 continue
         irate = []
         drate = []
-        for i,elem in enumerate(inserts):
+        for i, elem in enumerate(inserts):
             try:
-                irate.append(str("%.2e" %(inserts[i]/blen[i])))
+                irate.append(str("%.2e" % (inserts[i]/blen[i])))
             except:
                 irate.append('0')
             try:
-                drate.append(str("%.2e" %(deletes[i]/blen[i])))
+                drate.append(str("%.2e" % (deletes[i]/blen[i])))
             except:
                 drate.append('0')
-        print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate))
+        print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) )
         sys.exit()
     
-    
     inf = open(infile, 'r')
     start_met = False
     end_met = False
@@ -163,14 +161,14 @@
         try:
             assert int(elems[0])
             assert len(elems) == 18
-            if dbkey_i not in elems[1]: 
-                if not(start_met):   
+            if dbkey_i not in elems[1]:
+                if not(start_met):
                     continue
                 else:
                     sp_end = n
                     break
             else:
-                print >>sp_file, line
+                print >> sp_file, line
                 if not(start_met):
                     start_met = True
                     sp_start = n
@@ -201,7 +199,7 @@
     for item in indel:
         if type( item ) is GenomicInterval:
             indelTree.insert( item, indel.linenum, item.fields )
-    result=[]
+    result = []
     
     global full, blk_len, blk_list
     for interval in win:
@@ -213,14 +211,14 @@
             chrom = interval.chrom
             start = int(interval.start)
             end = int(interval.end)
-            if start > end: 
+            if start > end:
                 warn( "Interval start after end!" )
-            ins_chr = "%s.%s_insert" %(dbkey_i,chrom)
-            del_chr = "%s.%s_delete" %(dbkey_i,chrom)
+            ins_chr = "%s.%s_insert" % ( dbkey_i, chrom )
+            del_chr = "%s.%s_delete" % ( dbkey_i, chrom )
             irate = 0
             drate = 0
             if ins_chr not in indelTree.chroms and del_chr not in indelTree.chroms:
-                pass    
+                pass
             else:
                 if ins_chr in indelTree.chroms:
                     full = 0.0
@@ -242,8 +240,9 @@
                 
             interval.fields.append(str("%.2e" %irate))
             interval.fields.append(str("%.2e" %drate))
-            print >>fout, "\t".join(interval.fields)
+            print >> fout, "\t".join(interval.fields)
             fout.flush()
 
+
 if __name__ == "__main__":
-    main()    
\ No newline at end of file
+    main()

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/getIndels.py
--- a/tools/regVariation/getIndels.py
+++ b/tools/regVariation/getIndels.py
@@ -8,13 +8,12 @@
 
 from __future__ import division
 from galaxy import eggs
-import pkg_resources 
+import pkg_resources
 pkg_resources.require( "bx-python" )
 try:
     pkg_resources.require("numpy")
 except:
     pass
-import psyco_full
 import sys
 from bx.cookbook import doc_optparse
 from galaxy.tools.exception_handling import *
@@ -22,24 +21,24 @@
 
 assert sys.version_info[:2] >= ( 2, 4 )
 
-def main():   
+def main():
     # Parsing Command Line here
     options, args = doc_optparse.parse( __doc__ )
     
     try:
-        inp_file, out_file1 = args    
+        inp_file, out_file1 = args
     except:
         print >> sys.stderr, "Tool initialization error."
         sys.exit()
     
     try:
-        fin = open(inp_file,'r')
+        open(inp_file, 'r')
     except:
         print >> sys.stderr, "Unable to open input file"
         sys.exit()
     try:
-        fout1 = open(out_file1,'w')
-        #fout2 = open(out_file2,'w')
+        fout1 = open(out_file1, 'w')
+        #fout2 = open(out_file2, 'w')
     except:
         print >> sys.stderr, "Unable to open output file"
         sys.exit()
@@ -47,11 +46,10 @@
     try:
         maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
     except:
-        print >>sys.stderr, "Your MAF file appears to be malformed."
+        print >> sys.stderr, "Your MAF file appears to be malformed."
         sys.exit()
-    maf_count = 0
     
-    print >>fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length"
+    print >> fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length"
     for block_ind, block in enumerate(maf_reader):
         if len(block.components) < 2:
             continue
@@ -84,19 +82,19 @@
                         #write 2
                         if prev_pos_gap2 == 1:
                             prev_pos_gap2 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1,nt_pos1+1,nt_pos2-1,nt_pos2-1+gaplen2,gaplen2)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1, nt_pos1+1, nt_pos2-1, nt_pos2-1+gaplen2, gaplen2 )
                         if pos == len(seq1)-1:
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1,nt_pos1+1,nt_pos2+1-gaplen1,nt_pos2+1,gaplen1)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1, nt_pos1+1, nt_pos2+1-gaplen1, nt_pos2+1, gaplen1 )
                     else:
                         prev_pos_gap1 = 0
                         prev_pos_gap2 = 0
                         """
                         if prev_pos_gap1 == 1:
                             prev_pos_gap1 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,gaplen1)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, gaplen1 )
                         elif prev_pos_gap2 == 1:
                             prev_pos_gap2 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos2-1,nt_pos2,gaplen2)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos2-1, nt_pos2, gaplen2 )
                         """
                 else:
                     nt_pos1 += 1
@@ -105,19 +103,21 @@
                         #write both
                         if prev_pos_gap1 == 1:
                             prev_pos_gap1 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2-gaplen1,nt_pos2,gaplen1)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2-gaplen1, nt_pos2, gaplen1 )
                         elif prev_pos_gap2 == 1:
                             prev_pos_gap2 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1-gaplen2,nt_pos1,nt_pos2-1,nt_pos2,gaplen2)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1-gaplen2, nt_pos1, nt_pos2-1, nt_pos2, gaplen2 )
                     else:
                         gaplen2 += 1
                         prev_pos_gap2 = 1
                         #write 1
                         if prev_pos_gap1 == 1:
                             prev_pos_gap1 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2,nt_pos2+gaplen1,gaplen1)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2, nt_pos2+gaplen1, gaplen1 )
                         if pos == len(seq1)-1:
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1+1-gaplen2,nt_pos1+1,nt_pos2,nt_pos2+1,gaplen2)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1+1-gaplen2, nt_pos1+1, nt_pos2, nt_pos2+1, gaplen2 )
                 pos += 1
+
+
 if __name__ == "__main__":
     main()

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/linear_regression.py
--- a/tools/regVariation/linear_regression.py
+++ b/tools/regVariation/linear_regression.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 from galaxy import eggs
-import sys, string
+import sys
 from rpy import *
 import numpy
 
@@ -15,14 +15,14 @@
 outfile = sys.argv[4]
 outfile2 = sys.argv[5]
 
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 )
 fout = open(outfile,'w')
 elems = []
 for i, line in enumerate( file ( infile )):
     line = line.rstrip('\r\n')
     if len( line )>0 and not line.startswith( '#' ):
         elems = line.split( '\t' )
-        break 
+        break
     if i == 30:
         break # Hopefully we'll never get here...
 
@@ -32,12 +32,12 @@
 y_vals = []
 x_vals = []
 
-for k,col in enumerate(x_cols):
+for k, col in enumerate(x_cols):
     x_cols[k] = int(col)-1
     x_vals.append([])
 
 NA = 'NA'
-for ind,line in enumerate( file( infile )):
+for ind, line in enumerate( file( infile )):
     if line and not line.startswith( '#' ):
         try:
             fields = line.split("\t")
@@ -46,7 +46,7 @@
             except:
                 yval = r('NA')
             y_vals.append(yval)
-            for k,col in enumerate(x_cols):
+            for k, col in enumerate(x_cols):
                 try:
                     xval = float(fields[col])
                 except:
@@ -57,7 +57,7 @@
 
 x_vals1 = numpy.asarray(x_vals).transpose()
 
-dat= r.list(x=array(x_vals1), y=y_vals)
+dat = r.list(x=array(x_vals1), y=y_vals)
 
 set_default_mode(NO_CONVERSION)
 try:
@@ -66,8 +66,8 @@
     stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.")
 set_default_mode(BASIC_CONVERSION)
 
-coeffs=linear_model.as_py()['coefficients']
-yintercept= coeffs['(Intercept)']
+coeffs = linear_model.as_py()['coefficients']
+yintercept = coeffs['(Intercept)']
 summary = r.summary(linear_model)
 
 co = summary.get('coefficients', 'NA')
@@ -82,8 +82,8 @@
 except:
     pass
 
-print >>fout, "Y-intercept\t%s" %(yintercept)
-print >>fout, "p-value (Y-intercept)\t%s" %(pvaly)
+print >> fout, "Y-intercept\t%s" % (yintercept)
+print >> fout, "p-value (Y-intercept)\t%s" % (pvaly)
 
 if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
     try:
@@ -94,22 +94,22 @@
         pval = r.round(float(co[1][3]), digits=10)
     except:
         pval = 'NA'
-    print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope)
-    print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval)
+    print >> fout, "Slope (c%d)\t%s" % ( x_cols[0]+1, slope )
+    print >> fout, "p-value (c%d)\t%s" % ( x_cols[0]+1, pval )
 else:    #Multiple regression case with >1 predictors
-    ind=1
+    ind = 1
     while ind < len(coeffs.keys()):
         try:
             slope = r.round(float(coeffs['x'+str(ind)]), digits=10)
         except:
             slope = 'NA'
-        print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,slope)
+        print >> fout, "Slope (c%d)\t%s" % ( x_cols[ind-1]+1, slope )
         try:
             pval = r.round(float(co[ind][3]), digits=10)
         except:
             pval = 'NA'
-        print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval)
-        ind+=1
+        print >> fout, "p-value (c%d)\t%s" % ( x_cols[ind-1]+1, pval )
+        ind += 1
 
 rsq = summary.get('r.squared','NA')
 adjrsq = summary.get('adj.r.squared','NA')
@@ -125,14 +125,14 @@
 except:
     pass
 
-print >>fout, "R-squared\t%s" %(rsq)
-print >>fout, "Adjusted R-squared\t%s" %(adjrsq)
-print >>fout, "F-statistic\t%s" %(fstat)
-print >>fout, "Sigma\t%s" %(sigma)
+print >> fout, "R-squared\t%s" % (rsq)
+print >> fout, "Adjusted R-squared\t%s" % (adjrsq)
+print >> fout, "F-statistic\t%s" % (fstat)
+print >> fout, "Sigma\t%s" % (sigma)
 
 r.pdf( outfile2, 8, 8 )
 if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
-    sub_title =  "Slope = %s; Y-int = %s" %(slope,yintercept)
+    sub_title =  "Slope = %s; Y-int = %s" % ( slope, yintercept )
     try:
         r.plot(x=x_vals[0], y=y_vals, xlab="X", ylab="Y", sub=sub_title, main="Scatterplot with regression")
         r.abline(a=yintercept, b=slope, col="red")

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/logistic_regression_vif.py
--- a/tools/regVariation/logistic_regression_vif.py
+++ b/tools/regVariation/logistic_regression_vif.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 from galaxy import eggs
-import sys, string
+import sys
 from rpy import *
 import numpy
 
@@ -15,14 +15,14 @@
 outfile = sys.argv[4]
 
 
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 )
 fout = open(outfile,'w')
 elems = []
-for i, line in enumerate( file ( infile )):
+for i, line in enumerate( file( infile ) ):
     line = line.rstrip('\r\n')
     if len( line )>0 and not line.startswith( '#' ):
         elems = line.split( '\t' )
-        break 
+        break
     if i == 30:
         break # Hopefully we'll never get here...
 
@@ -32,12 +32,12 @@
 y_vals = []
 x_vals = []
 
-for k,col in enumerate(x_cols):
+for k, col in enumerate(x_cols):
     x_cols[k] = int(col)-1
     x_vals.append([])
 
 NA = 'NA'
-for ind,line in enumerate( file( infile )):
+for ind, line in enumerate( file( infile )):
     if line and not line.startswith( '#' ):
         try:
             fields = line.split("\t")
@@ -46,7 +46,7 @@
             except:
                 yval = r('NA')
             y_vals.append(yval)
-            for k,col in enumerate(x_cols):
+            for k, col in enumerate(x_cols):
                 try:
                     xval = float(fields[col])
                 except:
@@ -57,46 +57,45 @@
 
 x_vals1 = numpy.asarray(x_vals).transpose()
 
-check1=0
-check0=0
+check1 = 0
+check0 = 0
 for i in y_vals:
     if i == 1:
-        check1=1
+        check1 = 1
     if i == 0:
-        check0=1
-if check1==0 or check0==0:
+        check0 = 1
+if check1 == 0 or check0 == 0:
     sys.exit("Warning: logistic regression must have at least two classes")
 
 for i in y_vals:
-    if i not in [1,0,r('NA')]:
-        print >>fout, str(i)
+    if i not in [1, 0, r('NA')]:
+        print >> fout, str(i)
         sys.exit("Warning: the current version of this tool can run only with two classes and need to be labeled as 0 and 1.")
-    
-    
-dat= r.list(x=array(x_vals1), y=y_vals)
-novif=0
+
+dat = r.list(x=array(x_vals1), y=y_vals)
+novif = 0
 set_default_mode(NO_CONVERSION)
 try:
-    linear_model = r.glm(r("y ~ x"), data = r.na_exclude(dat),family="binomial")
+    linear_model = r.glm(r("y ~ x"), data=r.na_exclude(dat), family="binomial")
 except RException, rex:
     stop_err("Error performing logistic regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.")
 if len(x_cols)>1:
     try:
         r('suppressPackageStartupMessages(library(car))')
-        r.assign('dat',dat)
-        r.assign('ncols',len(x_cols))
-        vif=r.vif(r('glm(dat$y ~ ., data = na.exclude(data.frame(as.matrix(dat$x,ncol=ncols))->datx),family="binomial")'))
+        r.assign('dat', dat)
+        r.assign('ncols', len(x_cols))
+        vif = r.vif(r('glm(dat$y ~ ., data = na.exclude(data.frame(as.matrix(dat$x,ncol=ncols))->datx), family="binomial")'))
     except RException, rex:
         print rex
 else:
-    novif=1
-    
+    novif = 1
+
 set_default_mode(BASIC_CONVERSION)
 
-coeffs=linear_model.as_py()['coefficients']
-null_deviance=linear_model.as_py()['null.deviance']
-residual_deviance=linear_model.as_py()['deviance']
-yintercept= coeffs['(Intercept)']
+coeffs = linear_model.as_py()['coefficients']
+null_deviance = linear_model.as_py()['null.deviance']
+residual_deviance = linear_model.as_py()['deviance']
+yintercept = coeffs['(Intercept)']
 summary = r.summary(linear_model)
 co = summary.get('coefficients', 'NA')
 """
@@ -109,14 +108,14 @@
     pvaly = r.round(float(co[0][3]), digits=10)
 except:
     pass
-print >>fout, "response column\tc%d" %(y_col+1)
-tempP=[]
+print >> fout, "response column\tc%d" % (y_col+1)
+tempP = []
 for i in x_cols:
     tempP.append('c'+str(i+1))
-tempP=','.join(tempP)
-print >>fout, "predictor column(s)\t%s" %(tempP)
-print >>fout, "Y-intercept\t%s" %(yintercept)
-print >>fout, "p-value (Y-intercept)\t%s" %(pvaly)
+tempP = ','.join(tempP)
+print >> fout, "predictor column(s)\t%s" % (tempP)
+print >> fout, "Y-intercept\t%s" % (yintercept)
+print >> fout, "p-value (Y-intercept)\t%s" % (pvaly)
 
 if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
     try:
@@ -127,44 +126,43 @@
         pval = r.round(float(co[1][3]), digits=10)
     except:
         pval = 'NA'
-    print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope)
-    print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval)
+    print >> fout, "Slope (c%d)\t%s" % ( x_cols[0]+1, slope )
+    print >> fout, "p-value (c%d)\t%s" % ( x_cols[0]+1, pval )
 else:    #Multiple regression case with >1 predictors
-    ind=1
+    ind = 1
     while ind < len(coeffs.keys()):
         try:
             slope = r.round(float(coeffs['x'+str(ind)]), digits=10)
         except:
             slope = 'NA'
-        print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,slope)
+        print >> fout, "Slope (c%d)\t%s" % ( x_cols[ind-1]+1, slope )
         try:
             pval = r.round(float(co[ind][3]), digits=10)
         except:
             pval = 'NA'
-        print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval)
-        ind+=1
+        print >> fout, "p-value (c%d)\t%s" % ( x_cols[ind-1]+1, pval )
+        ind += 1
 
 rsq = summary.get('r.squared','NA')
 
-
 try:
-    rsq= r.round(float((null_deviance-residual_deviance)/null_deviance), digits=5)
-    null_deviance= r.round(float(null_deviance), digits=5)
-    residual_deviance= r.round(float(residual_deviance), digits=5)
+    rsq = r.round(float((null_deviance-residual_deviance)/null_deviance), digits=5)
+    null_deviance = r.round(float(null_deviance), digits=5)
+    residual_deviance = r.round(float(residual_deviance), digits=5)
 except:
     pass
 
-print >>fout, "Null deviance\t%s" %(null_deviance)
-print >>fout, "Residual deviance\t%s" %(residual_deviance)
-print >>fout, "pseudo R-squared\t%s" %(rsq)
-print >>fout, "\n"
-print >>fout, 'vif'
+print >> fout, "Null deviance\t%s" % (null_deviance)
+print >> fout, "Residual deviance\t%s" % (residual_deviance)
+print >> fout, "pseudo R-squared\t%s" % (rsq)
+print >> fout, "\n"
+print >> fout, 'vif'
 
-if novif==0:
-    py_vif=vif.as_py()
-    count=0
+if novif == 0:
+    py_vif = vif.as_py()
+    count = 0
     for i in sorted(py_vif.keys()):
-        print >>fout,'c'+str(x_cols[count]+1) ,str(py_vif[i])
-        count+=1
-elif novif==1:
-    print >>fout, "vif can calculate only when model have more than 1 predictor"
+        print >> fout, 'c'+str(x_cols[count]+1), str(py_vif[i])
+        count += 1
+elif novif == 1:
+    print >> fout, "vif can calculate only when model have more than 1 predictor"

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/maf_cpg_filter.py
--- a/tools/regVariation/maf_cpg_filter.py
+++ b/tools/regVariation/maf_cpg_filter.py
@@ -10,7 +10,7 @@
 """
 
 from galaxy import eggs
-import pkg_resources 
+import pkg_resources
 pkg_resources.require( "bx-python" )
 try:
     pkg_resources.require( "numpy" )
@@ -54,7 +54,7 @@
         defn = "non-CpG"
     cpgfilter.run( reader, writer.write )
     
-    print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
+    print "%2.2f percent bases masked; Mask character = %s, Definition = %s" % ( float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn )
 
 if __name__ == "__main__":
     main()

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/microsats_alignment_level.py
--- a/tools/regVariation/microsats_alignment_level.py
+++ b/tools/regVariation/microsats_alignment_level.py
@@ -4,7 +4,11 @@
 Uses SPUTNIK to fetch microsatellites and extracts orthologous repeats from the sputnik output.
 """
 from galaxy import eggs
-import sys, os, tempfile, string, math, re
+import os
+import re
+import string
+import sys
+import tempfile
 
 def reverse_complement(text):
     DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
@@ -12,31 +16,26 @@
     comp.reverse()
     return "".join(comp)
 
+
 def main():
     if len(sys.argv) != 8:
-        print >>sys.stderr, "Insufficient number of arguments."
+        print >> sys.stderr, "Insufficient number of arguments."
         sys.exit()
     
     infile = open(sys.argv[1],'r')
     separation = int(sys.argv[2])
     outfile = sys.argv[3]
-    align_type = sys.argv[4]
-    if align_type == "2way":
-        align_type_len = 2
-    elif align_type == "3way":
-        align_type_len = 3
     mono_threshold = int(sys.argv[5])
     non_mono_threshold = int(sys.argv[6])
     allow_different_units = int(sys.argv[7])
     
-    print "Min distance = %d bp; Min threshold for mono repeats = %d; Min threshold for non-mono repeats = %d; Allow different motifs = %s" %(separation, mono_threshold, non_mono_threshold, allow_different_units==1)
+    print "Min distance = %d bp; Min threshold for mono repeats = %d; Min threshold for non-mono repeats = %d; Allow different motifs = %s" % ( separation, mono_threshold, non_mono_threshold, allow_different_units==1 )
     try:
         fout = open(outfile, "w")
-        print >>fout, "#Block\tSeq1_Name\tSeq1_Start\tSeq1_End\tSeq1_Type\tSeq1_Length\tSeq1_RepeatNumber\tSeq1_Unit\tSeq2_Name\tSeq2_Start\tSeq2_End\tSeq2_Type\tSeq2_Length\tSeq2_RepeatNumber\tSeq2_Unit"
+        print >> fout, "#Block\tSeq1_Name\tSeq1_Start\tSeq1_End\tSeq1_Type\tSeq1_Length\tSeq1_RepeatNumber\tSeq1_Unit\tSeq2_Name\tSeq2_Start\tSeq2_End\tSeq2_Type\tSeq2_Length\tSeq2_RepeatNumber\tSeq2_Unit"
         #sputnik_cmd = os.path.join(os.path.split(sys.argv[0])[0], "sputnik")
         sputnik_cmd = "sputnik"
         input = infile.read()
-        skipped = 0
         block_num = 0
         input = input.replace('\r','\n')
         for block in input.split('\n\n'):
@@ -44,26 +43,24 @@
             tmpin = tempfile.NamedTemporaryFile()
             tmpout = tempfile.NamedTemporaryFile()
             tmpin.write(block.strip())
-            blk = tmpin.read()
             cmdline = sputnik_cmd + " " + tmpin.name + "  > /dev/null 2>&1 >> " + tmpout.name
             try:
                 os.system(cmdline)
-            except Exception, es:
+            except Exception:
                 continue
             sputnik_out = tmpout.read()
             tmpin.close()
             tmpout.close()
             if sputnik_out != "":
                 if len(block.split('>')[1:]) != 2:        #len(sputnik_out.split('>')):
-                    skipped += 1
                     continue
                 align_block = block.strip().split('>')
                 
                 lendict = {'mononucleotide':1, 'dinucleotide':2, 'trinucleotide':3, 'tetranucleotide':4, 'pentanucleotide':5, 'hexanucleotide':6}
-                blockdict={}
-                r=0
-                namelist=[]
-                for k,sput_block in enumerate(sputnik_out.split('>')[1:]):
+                blockdict = {}
+                r = 0
+                namelist = []
+                for k, sput_block in enumerate(sputnik_out.split('>')[1:]):
                     whole_seq = ''.join(align_block[k+1].split('\n')[1:]).replace('\n','').strip()
                     p = re.compile('\n(\S*nucleotide)')
                     repeats = p.split(sput_block.strip())
@@ -71,13 +68,12 @@
                     j = 1
                     name = repeats[0].strip()
                     try:
-                        coords = re.search('\d+[-_:]\d+',name).group()
-                        coords = coords.replace('_','-').replace(':','-')
-                    except Exception, e:
+                        coords = re.search('\d+[-_:]\d+', name).group()
+                        coords = coords.replace('_', '-').replace(':', '-')
+                    except Exception:
                         coords = '0-0'
-                        pass
                     r += 1
-                    blockdict[r]={}
+                    blockdict[r] = {}
                     try:
                         sp_name = name[:name.index('.')]
                         chr_name = name[name.index('.'):name.index('(')]
@@ -91,11 +87,10 @@
                                 continue
                             
                             if blockdict[r].has_key('types'):
-                                blockdict[r]['types'].append(repeats[j].strip())        #type of microsat     
+                                blockdict[r]['types'].append(repeats[j].strip())  #type of microsat
                             else:
-                                blockdict[r]['types'] = [repeats[j].strip()]               #type of microsat  
+                                blockdict[r]['types'] = [repeats[j].strip()]  #type of microsat
                             
-                            sequence = ''.join(align_block[r].split('\n')[1:]).replace('\n','').strip()
                             start = int(repeats[j+1].split('--')[0].split(':')[0].strip())
                             #check to see if there are gaps before the start of the repeat, and change the start accordingly
                             sgaps = 0
@@ -107,7 +102,7 @@
                                     break    #break at the 1st non-gap character
                                 ch_pos -= 1
                             if blockdict[r].has_key('starts'):
-                                blockdict[r]['starts'].append(start+sgaps)        #start co-ords adjusted with alignment co-ords to include GAPS    
+                                blockdict[r]['starts'].append(start+sgaps)  #start co-ords adjusted with alignment co-ords to include GAPS
                             else:
                                 blockdict[r]['starts'] = [start+sgaps]
                             
@@ -120,7 +115,7 @@
                                 else:
                                     break    #break at the 1st non-gap character
                             if blockdict[r].has_key('ends'):
-                                blockdict[r]['ends'].append(end+egaps)        #end co-ords adjusted with alignment co-ords to include GAPS    
+                                blockdict[r]['ends'].append(end+egaps)  #end co-ords adjusted with alignment co-ords to include GAPS
                             else:
                                 blockdict[r]['ends'] = [end+egaps]
                                 
@@ -134,20 +129,20 @@
                             gaps_before_start = whole_seq[:rel_start].count('-')
                             
                             if blockdict[r].has_key('gaps_before_start'):
-                                blockdict[r]['gaps_before_start'].append(gaps_before_start)  #lengths  
+                                blockdict[r]['gaps_before_start'].append(gaps_before_start)  #lengths
                             else:
-                                blockdict[r]['gaps_before_start'] = [gaps_before_start]       #lengths 
+                                blockdict[r]['gaps_before_start'] = [gaps_before_start]  #lengths
                             
-                            whole_seq_start= int(coords.split('-')[0])
+                            whole_seq_start = int(coords.split('-')[0])
                             if blockdict[r].has_key('whole_seq_start'):
-                                blockdict[r]['whole_seq_start'].append(whole_seq_start)  #lengths  
+                                blockdict[r]['whole_seq_start'].append(whole_seq_start)  #lengths
                             else:
-                                blockdict[r]['whole_seq_start'] = [whole_seq_start]       #lengths 
+                                blockdict[r]['whole_seq_start'] = [whole_seq_start]  #lengths
                                 
                             if blockdict[r].has_key('lengths'):
-                                blockdict[r]['lengths'].append(repeat_len)  #lengths  
+                                blockdict[r]['lengths'].append(repeat_len)  #lengths
                             else:
-                                blockdict[r]['lengths'] = [repeat_len]       #lengths 
+                                blockdict[r]['lengths'] = [repeat_len]  #lengths
                             
                             if blockdict[r].has_key('counts'):
                                 blockdict[r]['counts'].append(str(int(repeat_len)/lendict[repeats[j].strip()]))  #Repeat Unit
@@ -159,10 +154,10 @@
                             else:
                                 blockdict[r]['units'] = [repeat_seq[:lendict[repeats[j].strip()]]]         #Repeat Unit
                             
-                        except Exception, eh:
+                        except Exception:
                             pass
-                        j+=2
-                    #check the co-ords of all repeats corresponding to a sequence and remove adjacent repeats separated by less than the user-specified 'separation'. 
+                        j += 2
+                    #check the co-ords of all repeats corresponding to a sequence and remove adjacent repeats separated by less than the user-specified 'separation'.
                     delete_index_list = []
                     for ind, item in enumerate(blockdict[r]['ends']):
                         try:
@@ -171,7 +166,7 @@
                                     delete_index_list.append(ind)
                                 if ind+1 not in delete_index_list:
                                     delete_index_list.append(ind+1)
-                        except Exception, ek:
+                        except Exception:
                             pass
                     for index in delete_index_list:    #mark them for deletion
                         try:
@@ -183,7 +178,7 @@
                             blockdict[r]['lengths'][index] = 'marked'
                             blockdict[r]['counts'][index] = 'marked'
                             blockdict[r]['units'][index] = 'marked'
-                        except Exception, ej:
+                        except Exception:
                             pass
                     #remove 'marked' elements from all the lists
                     """
@@ -192,19 +187,19 @@
                             if elem == 'marked':
                                 blockdict[r][key].remove(elem)
                     """
-                    #print blockdict    
+                    #print blockdict
                 
-                #make sure that the blockdict has keys for both the species   
+                #make sure that the blockdict has keys for both the species
                 if (1 not in blockdict) or (2 not in blockdict):
                     continue
                 
                 visited_2 = [0 for x in range(len(blockdict[2]['starts']))]
-                for ind1,coord_s1 in enumerate(blockdict[1]['starts']):
+                for ind1, coord_s1 in enumerate(blockdict[1]['starts']):
                     if coord_s1 == 'marked':
                         continue
                     coord_e1 = blockdict[1]['ends'][ind1]
                     out = []
-                    for ind2,coord_s2 in enumerate(blockdict[2]['starts']):
+                    for ind2, coord_s2 in enumerate(blockdict[2]['starts']):
                         if coord_s2 == 'marked':
                             visited_2[ind2] = 1
                             continue
@@ -216,7 +211,7 @@
                             else:
                                 if (blockdict[1]['units'][ind1] not in blockdict[2]['units'][ind2]*2) and (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2):
                                     continue
-                        #print >>sys.stderr, (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2)
+                        #print >> sys.stderr, (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2)
                         #skip if the repeat number thresholds are not met
                         if blockdict[1]['types'][ind1] == 'mononucleotide':
                             if (int(blockdict[1]['counts'][ind1]) < mono_threshold):
@@ -231,12 +226,12 @@
                         else:
                             if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold):
                                 continue
-                        #print "s1,e1=%s,%s; s2,e2=%s,%s" %(coord_s1,coord_e1,coord_s2,coord_e2)
-                        if (coord_s1 in range(coord_s2,coord_e2)) or (coord_e1 in range(coord_s2,coord_e2)):
+                        #print "s1,e1=%s,%s; s2,e2=%s,%s" % ( coord_s1, coord_e1, coord_s2, coord_e2 )
+                        if (coord_s1 in range(coord_s2, coord_e2)) or (coord_e1 in range(coord_s2, coord_e2)):
                             out.append(str(block_num))
                             out.append(namelist[0])
                             rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1]
-                            rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) 
+                            rel_end = rel_start + int(blockdict[1]['lengths'][ind1])
                             out.append(str(rel_start))
                             out.append(str(rel_end))
                             out.append(blockdict[1]['types'][ind1])
@@ -245,16 +240,16 @@
                             out.append(blockdict[1]['units'][ind1])
                             out.append(namelist[1])
                             rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2]
-                            rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) 
+                            rel_end = rel_start + int(blockdict[2]['lengths'][ind2])
                             out.append(str(rel_start))
                             out.append(str(rel_end))
                             out.append(blockdict[2]['types'][ind2])
                             out.append(blockdict[2]['lengths'][ind2])
                             out.append(blockdict[2]['counts'][ind2])
                             out.append(blockdict[2]['units'][ind2])
-                            print >>fout, '\t'.join(out)
+                            print >> fout, '\t'.join(out)
                             visited_2[ind2] = 1
-                            out=[]
+                            out = []
                 
                 if 0 in visited_2:    #there are still some elements in 2nd set which haven't found orthologs yet.
                     for ind2, coord_s2 in enumerate(blockdict[2]['starts']):
@@ -264,7 +259,7 @@
                             continue
                         coord_e2 = blockdict[2]['ends'][ind2]
                         out = []
-                        for ind1,coord_s1 in enumerate(blockdict[1]['starts']):
+                        for ind1, coord_s1 in enumerate(blockdict[1]['starts']):
                             if coord_s1 == 'marked':
                                 continue
                             coord_e1 = blockdict[1]['ends'][ind1]
@@ -290,11 +285,11 @@
                                 if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold):
                                     continue
                             
-                            if (coord_s2 in range(coord_s1,coord_e1)) or (coord_e2 in range(coord_s1,coord_e1)):
-                                out.append(str(block_num)) 
+                            if (coord_s2 in range(coord_s1, coord_e1)) or (coord_e2 in range(coord_s1, coord_e1)):
+                                out.append(str(block_num))
                                 out.append(namelist[0])
                                 rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1]
-                                rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) 
+                                rel_end = rel_start + int(blockdict[1]['lengths'][ind1])
                                 out.append(str(rel_start))
                                 out.append(str(rel_end))
                                 out.append(blockdict[1]['types'][ind1])
@@ -303,21 +298,21 @@
                                 out.append(blockdict[1]['units'][ind1])
                                 out.append(namelist[1])
                                 rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2]
-                                rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) 
+                                rel_end = rel_start + int(blockdict[2]['lengths'][ind2])
                                 out.append(str(rel_start))
                                 out.append(str(rel_end))
                                 out.append(blockdict[2]['types'][ind2])
                                 out.append(blockdict[2]['lengths'][ind2])
                                 out.append(blockdict[2]['counts'][ind2])
                                 out.append(blockdict[2]['units'][ind2])
-                                print >>fout, '\t'.join(out)
+                                print >> fout, '\t'.join(out)
                                 visited_2[ind2] = 1
-                                out=[]
+                                out = []
                             
-                    #print >>fout, blockdict
+                    #print >> fout, blockdict
     except Exception, exc:
-        print >>sys.stderr, "type(exc),args,exc: %s, %s, %s" %(type(exc), exc.args, exc)
+        print >> sys.stderr, "type(exc),args,exc: %s, %s, %s" % ( type(exc), exc.args, exc )
+
 
 if __name__ == "__main__":
     main()
-    

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/microsats_mutability.py
--- a/tools/regVariation/microsats_mutability.py
+++ b/tools/regVariation/microsats_mutability.py
@@ -4,7 +4,10 @@
 This tool computes microsatellite mutability for the orthologous microsatellites fetched from  'Extract Orthologous Microsatellites from pair-wise alignments' tool.
 """
 from galaxy import eggs
-import sys, string, re, commands, tempfile, os, fileinput
+import fileinput
+import string
+import sys
+import tempfile
 from galaxy.tools.util.galaxyops import *
 from bx.intervals.io import *
 from bx.intervals.operations import quicksect
@@ -19,7 +22,7 @@
 p_group_cols = [p_group, p_group+7]
 s_group_cols = [s_group, s_group+7]
 num_generations = int(sys.argv[7])
-region = sys.argv[8] 
+region = sys.argv[8]
 int_file = sys.argv[9]
 if int_file != "None": #User has specified an interval file
     try:
@@ -28,31 +31,35 @@
         chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[11] )
     except:
         stop_err("Unable to open input Interval file")
-    
+
+
 def stop_err(msg):
     sys.stderr.write(msg)
     sys.exit()
 
+
 def reverse_complement(text):
     DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
     comp = [ch for ch in text.translate(DNA_COMP)]
     comp.reverse()
     return "".join(comp)
 
+
 def get_unique_elems(elems):
-    seen=set()
+    seen = set()
     return[x for x in elems if x not in seen and not seen.add(x)]
 
+
 def get_binned_lists(uniqlist, binsize):
-    binnedlist=[]
+    binnedlist = []
     uniqlist.sort()
     start = int(uniqlist[0])
-    bin_ind=0
-    l_ind=0
+    bin_ind = 0
+    l_ind = 0
     binnedlist.append([])
     while l_ind < len(uniqlist):
         elem = int(uniqlist[l_ind])
-        if elem in range(start,start+binsize):
+        if elem in range(start, start+binsize):
             binnedlist[bin_ind].append(elem)
         else:
             start += binsize
@@ -62,39 +69,38 @@
         l_ind += 1
     return binnedlist
 
-def fetch_weight(H,C,t):
+
+def fetch_weight(H, C, t):
     if (H-(C-H)) < t:
         return 2.0
     else:
         return 1.0
 
-def mutabilityEstimator(repeats1,repeats2,thresholds):
+
+def mutabilityEstimator(repeats1, repeats2, thresholds):
     mut_num = 0.0    #Mutability Numerator
     mut_den = 0.0    #Mutability denominator
-    for ind,H in enumerate(repeats1):
+    for ind, H in enumerate(repeats1):
         C = repeats2[ind]
         t = thresholds[ind]
-        w = fetch_weight(H,C,t)
+        w = fetch_weight(H, C, t)
         mut_num += ((H-C)*(H-C)*w)
         mut_den += w
     return [mut_num, mut_den]
 
+
 def output_writer(blk, blk_lines):
     global winspecies, speciesind
-    all_elems_1=[]
-    all_elems_2=[]
-    all_s_elems_1=[]
-    all_s_elems_2=[]
+    all_elems_1 = []
+    all_elems_2 = []
+    all_s_elems_1 = []
+    all_s_elems_2 = []
     for bline in blk_lines:
         if not(bline):
             continue
         items = bline.split('\t')
         seq1 = items[1]
-        start1 = items[2]
-        end1 = items[3]
         seq2 = items[8]
-        start2 = items[9]
-        end2 = items[10] 
         if p_group_cols[0] == 6:
             items[p_group_cols[0]] = int(items[p_group_cols[0]])
             items[p_group_cols[1]] = int(items[p_group_cols[1]])
@@ -111,8 +117,8 @@
     if s_group_cols[0] != -1:
         uniq_s_elems_1 = get_unique_elems(all_s_elems_1)
         uniq_s_elems_2 = get_unique_elems(all_s_elems_2)
-    mut1={}
-    mut2={}
+    mut1 = {}
+    mut2 = {}
     count1 = {}
     count2 = {}
     """
@@ -120,12 +126,12 @@
         uniq_elems_1 = get_unique_units(j.sort(lambda x, y: len(x)-len(y)))
     """
     if p_group_cols[0] == 6:    #i.e. the option chosen is group-by repeat number.
-        uniq_elems_1 = get_binned_lists(uniq_elems_1,p_bin_size)
-        uniq_elems_2 = get_binned_lists(uniq_elems_2,p_bin_size)
+        uniq_elems_1 = get_binned_lists( uniq_elems_1, p_bin_size )
+        uniq_elems_2 = get_binned_lists( uniq_elems_2, p_bin_size )
         
     if s_group_cols[0] == 6:    #i.e. the option chosen is subgroup-by repeat number.
-        uniq_s_elems_1 = get_binned_lists(uniq_s_elems_1,s_bin_size)
-        uniq_s_elems_2 = get_binned_lists(uniq_s_elems_2,s_bin_size)
+        uniq_s_elems_1 = get_binned_lists( uniq_s_elems_1, s_bin_size )
+        uniq_s_elems_2 = get_binned_lists( uniq_s_elems_2, s_bin_size )
 
     for pitem1 in uniq_elems_1:
         #repeats1 = []
@@ -143,61 +149,61 @@
                         if p_group_cols[0] == 6:
                             belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
                         if belems[p_group_cols[0]] in pitem1:
-                            if belems[s_group_cols[0]]==sitem1:
+                            if belems[s_group_cols[0]] == sitem1:
                                 repeats1.append(int(belems[6]))
                                 repeats2.append(int(belems[13]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut1[str(pitem1)+'\t'+str(sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
+                                mut1[str(pitem1)+'\t'+str(sitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds )
                                 if region == 'align':
-                                    count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2))
-                                else:    
+                                    count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1)
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1)
                                     elif winspecies == 2:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2)
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2)
                     else:
                         if type(sitem1) == list:
                             if s_group_cols[0] == 6:
                                 belems[s_group_cols[0]] = int(belems[s_group_cols[0]])
-                            if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]] in sitem1:
+                            if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] in sitem1:
                                 repeats1.append(int(belems[6]))
                                 repeats2.append(int(belems[13]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
+                                mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds )
                                 if region == 'align':
-                                    count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2))
-                                else:    
+                                    count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats1)
+                                        count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats1)
                                     elif winspecies == 2:
-                                        count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats2)
+                                        count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats2)
                         else:
-                            if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]]==sitem1:
+                            if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] == sitem1:
                                 repeats1.append(int(belems[6]))
                                 repeats2.append(int(belems[13]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
+                                mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds )
                                 if region == 'align':
-                                    count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2))
-                                else:    
+                                    count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1)
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1)
                                     elif winspecies == 2:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2)
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2)
         else:   #Sub-group by feature is None
             for bline in blk_lines:
                 belems = bline.split('\t')
                 if type(pitem1) == list:
-                    #print >>sys.stderr, "item: " + str(item1)
+                    #print >> sys.stderr, "item: " + str(item1)
                     if p_group_cols[0] == 6:
                         belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
                     if belems[p_group_cols[0]] in pitem1:
@@ -208,21 +214,21 @@
                         else:
                             thresholds.append(non_mono_threshold)
                 else:
-                    if belems[p_group_cols[0]]==pitem1:
+                    if belems[p_group_cols[0]] == pitem1:
                         repeats1.append(int(belems[6]))
                         repeats2.append(int(belems[13]))
                         if belems[4] == 'mononucleotide':
                             thresholds.append(mono_threshold)
                         else:
                             thresholds.append(non_mono_threshold)
-            mut1["%s" %(pitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
+            mut1["%s" % (pitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds )
             if region == 'align':
-                count1["%s" %(pitem1)]=min(sum(repeats1),sum(repeats2))
-            else:            
+                count1["%s" % (pitem1)] = min( sum(repeats1), sum(repeats2) )
+            else:
                 if winspecies == 1:
-                    count1[str(pitem1)]=sum(repeats1)
+                    count1[str(pitem1)] = sum(repeats1)
                 elif winspecies == 2:
-                    count1[str(pitem1)]=sum(repeats2)
+                    count1[str(pitem1)] = sum(repeats2)
                 
     for pitem2 in uniq_elems_2:
         #repeats1 = []
@@ -239,57 +245,57 @@
                     if type(pitem2) == list:
                         if p_group_cols[0] == 6:
                             belems[p_group_cols[1]] = int(belems[p_group_cols[1]])
-                        if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]]==sitem2:
+                        if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]] == sitem2:
                             repeats2.append(int(belems[13]))
                             repeats1.append(int(belems[6]))
                             if belems[4] == 'mononucleotide':
                                 thresholds.append(mono_threshold)
                             else:
                                 thresholds.append(non_mono_threshold)
-                            mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
+                            mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
                             #count2[str(pitem2)+'\t'+str(sitem2)]=len(repeats2)
                             if region == 'align':
-                                count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2))
-                            else: 
+                                count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
+                            else:
                                 if winspecies == 1:
-                                    count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2)
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
                                 elif winspecies == 2:
-                                    count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1)
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
                     else:
                         if type(sitem2) == list:
                             if s_group_cols[0] == 6:
                                 belems[s_group_cols[1]] = int(belems[s_group_cols[1]])
-                            if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]] in sitem2:
+                            if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] in sitem2:
                                 repeats2.append(int(belems[13]))
                                 repeats1.append(int(belems[6]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
+                                mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
                                 if region == 'align':
-                                    count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2))
-                                else: 
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2)
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
                                     elif winspecies == 2:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1)
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
                         else:
-                            if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]]==sitem2:
+                            if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] == sitem2:
                                 repeats1.append(int(belems[13]))
                                 repeats2.append(int(belems[6]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
+                                mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
                                 if region == 'align':
-                                    count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2))
-                                else: 
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2)
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
                                     elif winspecies == 2:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1)
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
         else:   #Sub-group by feature is None
             for bline in blk_lines:
                 belems = bline.split('\t')
@@ -304,21 +310,21 @@
                         else:
                             thresholds.append(non_mono_threshold)
                 else:
-                    if belems[p_group_cols[1]]==pitem2:
+                    if belems[p_group_cols[1]] == pitem2:
                         repeats2.append(int(belems[13]))
                         repeats1.append(int(belems[6]))
                         if belems[4] == 'mononucleotide':
                             thresholds.append(mono_threshold)
                         else:
                             thresholds.append(non_mono_threshold)
-            mut2["%s" %(pitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
+            mut2["%s" % (pitem2)] = mutabilityEstimator( repeats2, repeats1, thresholds )
             if region == 'align':
-                count2["%s" %(pitem2)]=min(sum(repeats1),sum(repeats2))
+                count2["%s" % (pitem2)] = min( sum(repeats1), sum(repeats2) )
             else:
                 if winspecies == 1:
-                    count2["%s" %(pitem2)]=sum(repeats2)
+                    count2["%s" % (pitem2)] = sum(repeats2)
                 elif winspecies == 2:
-                    count2["%s" %(pitem2)]=sum(repeats1)
+                    count2["%s" % (pitem2)] = sum(repeats1)
     for key in mut1.keys():
         if key in mut2.keys():
             mut = (mut1[key][0]+mut2[key][0])/(mut1[key][1]+mut2[key][1])
@@ -328,9 +334,9 @@
             unit_found = False
             if p_group_cols[0] == 7 or s_group_cols[0] == 7: #if it is Repeat Unit (AG, GCT etc.) check for reverse-complements too
                 if p_group_cols[0] == 7:
-                    this,other = 0,1
+                    this, other = 0, 1
                 else:
-                    this,other = 1,0
+                    this, other = 1, 0
                 groups1 = key.split('\t')
                 mutn = mut1[key][0]
                 mutd = mut1[key][1]
@@ -351,28 +357,29 @@
             else:
                 mut = mut1[key][0]/mut1[key][1]
                 count = count1[key]
-        mut = "%.2e" %(mut/num_generations)
+        mut = "%.2e" % (mut/num_generations)
         if region == 'align':
-            print >>fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count)
+            print >> fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count)
         elif region == 'win':
-            fout.write("%s\t%s\t%s\t%s\n" %(blk,key.strip(),mut,count))
+            fout.write("%s\t%s\t%s\t%s\n" % ( blk, key.strip(), mut, count ))
             fout.flush()
             
     #catch any remaining repeats, for instance if the orthologous position contained different repeat units
     for remaining_key in mut2.keys():
         mut = mut2[remaining_key][0]/mut2[remaining_key][1]
-        mut = "%.2e" %(mut/num_generations)
+        mut = "%.2e" % (mut/num_generations)
         count = count2[remaining_key]
         if region == 'align':
-            print >>fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
+            print >> fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
         elif region == 'win':
-            fout.write("%s\t%s\t%s\t%s\n" %(blk,remaining_key.strip(),mut,count))
+            fout.write("%s\t%s\t%s\t%s\n" % ( blk, remaining_key.strip(), mut, count ))
             fout.flush()
-            #print >>fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
+            #print >> fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
+
 
 def counter(node, start, end, report_func):
     if start <= node.start < end and start < node.end <= end:
-        report_func(node) 
+        report_func(node)
         if node.right:
             counter(node.right, start, end, report_func)
         if node.left:
@@ -381,8 +388,8 @@
         counter(node.right, start, end, report_func)
     elif node.start >= end and node.left and node.left.maxend > start:
         counter(node.left, start, end, report_func)
-            
-        
+
+
 def main():
     infile = sys.argv[1]
     
@@ -400,21 +407,18 @@
     if region == 'win':
         if dbkey_i in elems[1]:
             winspecies = 1
-            speciesind = 1 
+            speciesind = 1
         elif dbkey_i in elems[8]:
             winspecies = 2
             speciesind = 8
         else:
-            stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.") 
+            stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.")
         
     fin = open(infile, 'r')
     skipped = 0
-    blk=0
-    win=0
-    linestr=""
+    linestr = ""
     
     if region == 'win':
-        
         msats = NiceReaderWrapper( fileinput.FileInput( infile ),
                                 chrom_col = speciesind,
                                 start_col = speciesind+1,
@@ -435,7 +439,7 @@
                 ichr = ielems[chr_col_i]
                 istart = int(ielems[start_col_i])
                 iend = int(ielems[end_col_i])
-                isrc = "%s.%s" %(dbkey_i,ichr)
+                isrc = "%s.%s" % ( dbkey_i, ichr )
                 if isrc not in msatTree.chroms:
                     continue
                 result = []
@@ -450,14 +454,14 @@
                 tmpfile1.seek(0)
                 output_writer(iline, tmpfile1.readlines())
             except:
-                skipped+=1
+                skipped += 1
         if skipped:
-            print "Skipped %d intervals as invalid." %(skipped)
+            print "Skipped %d intervals as invalid." % (skipped)
     elif region == 'align':
         if s_group_cols[0] != -1:
-            print >>fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount"
+            print >> fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount"
         else:
-            print >>fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount"
+            print >> fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount"
         prev_bnum = -1
         try:
             for line in fin:
@@ -481,9 +485,11 @@
                 prev_bnum = new_bnum
             output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n'))
         except Exception, ea:
-            print >>sys.stderr, ea
+            print >> sys.stderr, ea
             skipped += 1
         if skipped:
-            print "Skipped %d lines as invalid." %(skipped)
+            print "Skipped %d lines as invalid." % (skipped)
+
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/partialR_square.py
--- a/tools/regVariation/partialR_square.py
+++ b/tools/regVariation/partialR_square.py
@@ -2,7 +2,7 @@
 
 from galaxy import eggs
 
-import sys, string
+import sys
 from rpy import *
 import numpy
 
@@ -13,6 +13,7 @@
     sys.stderr.write(msg)
     sys.exit()
 
+
 def sscombs(s):
     if len(s) == 1:
         return [s]
@@ -26,14 +27,14 @@
 x_cols = sys.argv[3].split(',')
 outfile = sys.argv[4]
 
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 )
 fout = open(outfile,'w')
 
 for i, line in enumerate( file ( infile )):
     line = line.rstrip('\r\n')
     if len( line )>0 and not line.startswith( '#' ):
         elems = line.split( '\t' )
-        break 
+        break
     if i == 30:
         break # Hopefully we'll never get here...
 
@@ -43,7 +44,7 @@
 y_vals = []
 x_vals = []
 
-for k,col in enumerate(x_cols):
+for k, col in enumerate(x_cols):
     x_cols[k] = int(col)-1
     x_vals.append([])
     """
@@ -51,13 +52,13 @@
         float( elems[x_cols[k]] )
     except:
         try:
-            msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." %( col, elems[x_cols[k]] )
+            msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." % ( col, elems[x_cols[k]] )
         except:
             msg = "This operation cannot be performed on non-numeric data."
         stop_err( msg )
     """
 NA = 'NA'
-for ind,line in enumerate( file( infile )):
+for ind, line in enumerate( file( infile )):
     if line and not line.startswith( '#' ):
         try:
             fields = line.split("\t")
@@ -65,20 +66,20 @@
                 yval = float(fields[y_col])
             except Exception, ey:
                 yval = r('NA')
-                #print >>sys.stderr, "ey = %s" %ey
+                #print >> sys.stderr, "ey = %s" %ey
             y_vals.append(yval)
-            for k,col in enumerate(x_cols):
+            for k, col in enumerate(x_cols):
                 try:
                     xval = float(fields[col])
                 except Exception, ex:
                     xval = r('NA')
-                    #print >>sys.stderr, "ex = %s" %ex
+                    #print >> sys.stderr, "ex = %s" %ex
                 x_vals[k].append(xval)
         except:
             pass
 
 x_vals1 = numpy.asarray(x_vals).transpose()
-dat= r.list(x=array(x_vals1), y=y_vals)
+dat = r.list(x=array(x_vals1), y=y_vals)
 
 set_default_mode(NO_CONVERSION)
 try:
@@ -91,7 +92,7 @@
 fullr2 = summary.get('r.squared','NA')
 
 if fullr2 == 'NA':
-    stop_error("Error in linear regression")
+    stop_err("Error in linear regression")
 
 if len(x_vals) < 10:
     s = ""
@@ -100,10 +101,10 @@
 else:
     stop_err("This tool only works with less than 10 predictors.")
 
-print >>fout, "#Model\tR-sq\tpartial_R_Terms\tpartial_R_Value"
+print >> fout, "#Model\tR-sq\tpartial_R_Terms\tpartial_R_Value"
 all_combos = sorted(sscombs(s), key=len)
 all_combos.reverse()
-for j,cols in enumerate(all_combos):
+for j, cols in enumerate(all_combos):
     #if len(cols) == len(s):    #Same as the full model above
     #    continue
     if len(cols) == 1:
@@ -113,7 +114,7 @@
         for col in cols:
             x_v.append(x_vals[int(col)])
         x_vals1 = numpy.asarray(x_v).transpose()
-    dat= r.list(x=array(x_vals1), y=y_vals)
+    dat = r.list(x=array(x_vals1), y=y_vals)
     set_default_mode(NO_CONVERSION)
     red = r.lm(r("y ~ x"), data= dat)    #Reduced model
     set_default_mode(BASIC_CONVERSION)
@@ -136,11 +137,11 @@
         partial_R_col_str = "-"
         partial_R = "-"
     try:
-        redr2 = "%.4f" %(float(redr2))
+        redr2 = "%.4f" % (float(redr2))
     except:
         pass
     try:
-        partial_R = "%.4f" %(float(partial_R))
+        partial_R = "%.4f" % (float(partial_R))
     except:
         pass
-    print >>fout, "%s\t%s\t%s\t%s" %(col_str,redr2,partial_R_col_str,partial_R)
+    print >> fout, "%s\t%s\t%s\t%s" % ( col_str, redr2, partial_R_col_str, partial_R )

This diff is so big that we needed to truncate the remainder.

https://bitbucket.org/galaxy/galaxy-central/commits/82d2c2109792/
Changeset:   82d2c2109792
User:        dannon
Date:        2014-02-04 16:33:40
Summary:     Merged in nsoranzo/galaxy-central (pull request #312)

Remove unused imports and unused variables. Fix spacing.
Affected #:  17 files

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/WeightedAverage.py
--- a/tools/regVariation/WeightedAverage.py
+++ b/tools/regVariation/WeightedAverage.py
@@ -1,20 +1,16 @@
 #!/usr/bin/env python
 """
-
 usage: %prog bed_file_1 bed_file_2 out_file
     -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file
     -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file
 """
-from galaxy import eggs
 
 import collections
-import sys, string
+import sys
 #import numpy
 from galaxy import eggs
 import pkg_resources
 pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
 from galaxy.tools.util.galaxyops import *
 from bx.cookbook import doc_optparse
 
@@ -27,77 +23,72 @@
     sys.exit()
 
 
-def FindRate(chromosome,start_stop,dictType):
-    OverlapList=[]
+def FindRate(chromosome, start_stop, dictType):
+    OverlapList = []
     for tempO in dictType[chromosome]:
-        DatabaseInterval=[tempO[0],tempO[1]]
-        Overlap=GetOverlap(start_stop,DatabaseInterval)
-        if Overlap>0:
-            OverlapList.append([Overlap,tempO[2]])
-                                 
-    if len(OverlapList)>0:
-   
-        SumRecomb=0
-        SumOverlap=0
+        DatabaseInterval = [tempO[0], tempO[1]]
+        Overlap = GetOverlap( start_stop, DatabaseInterval )
+        if Overlap > 0:
+            OverlapList.append([Overlap, tempO[2]])
+    
+    if len(OverlapList) > 0:
+        SumRecomb = 0
+        SumOverlap = 0
         for member in OverlapList:
-            SumRecomb+=member[0]*member[1]
-            SumOverlap+=member[0]    
-        averageRate=SumRecomb/SumOverlap
-                               
+            SumRecomb += member[0]*member[1]
+            SumOverlap += member[0]
+        averageRate = SumRecomb/SumOverlap
         return averageRate
-                               
     else:
         return 'NA'
-                                                       
-                           
-    
-def GetOverlap(a,b):
-    return min(a[1],b[1])-max(a[0],b[0])
+
+
+def GetOverlap(a, b):
+    return min(a[1], b[1])-max(a[0], b[0])
+
 
 options, args = doc_optparse.parse( __doc__ )
 
 try:
     chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 )
-    chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 )      
+    chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 )
     input1, input2, input3 = args
 except Exception, eee:
     print eee
     stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
-    
 
+fd2 = open(input2)
+lines2 = fd2.readlines()
+RecombChrDict = collections.defaultdict(list)
 
-fd2=open(input2)
-lines2=fd2.readlines()
-RecombChrDict=collections.defaultdict(list)
-
-skipped=0
+skipped = 0
 for line in lines2:
-    temp=line.strip().split()
+    temp = line.strip().split()
     try:
         assert float(temp[int(name_col_2)])
     except:
-        skipped+=1
+        skipped += 1
         continue
-    tempIndex=[int(temp[int(start_col_2)]),int(temp[int(end_col_2)]),float(temp[int(name_col_2)])]
+    tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])]
     RecombChrDict[temp[int(chr_col_2)]].append(tempIndex)
 
-print "Skipped %d features with invalid values" %(skipped)
+print "Skipped %d features with invalid values" % (skipped)
 
-fd1=open(input1)
-lines=fd1.readlines()
-finalProduct=''
+fd1 = open(input1)
+lines = fd1.readlines()
+finalProduct = ''
 for line in lines:
-    temp=line.strip().split('\t')
-    chromosome=temp[int(chr_col_1)]
-    start=int(temp[int(start_col_1)])
-    stop=int(temp[int(end_col_1)])
-    start_stop=[start,stop]
-    RecombRate=FindRate(chromosome,start_stop,RecombChrDict)
+    temp = line.strip().split('\t')
+    chromosome = temp[int(chr_col_1)]
+    start = int(temp[int(start_col_1)])
+    stop = int(temp[int(end_col_1)])
+    start_stop = [start, stop]
+    RecombRate = FindRate( chromosome, start_stop, RecombChrDict )
     try:
-        RecombRate="%.4f" %(float(RecombRate))
+        RecombRate = "%.4f" % (float(RecombRate))
     except:
-        RecombRate=RecombRate
-    finalProduct+=line.strip()+'\t'+str(RecombRate)+'\n'
-fdd=open(input3,'w')
+        RecombRate = RecombRate
+    finalProduct += line.strip()+'\t'+str(RecombRate)+'\n'
+fdd = open(input3, 'w')
 fdd.writelines(finalProduct)
 fdd.close()

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/best_regression_subsets.py
--- a/tools/regVariation/best_regression_subsets.py
+++ b/tools/regVariation/best_regression_subsets.py
@@ -2,7 +2,7 @@
 
 from galaxy import eggs
 
-import sys, string
+import sys
 from rpy import *
 import numpy
 
@@ -10,19 +10,20 @@
     sys.stderr.write(msg)
     sys.exit()
 
+
 infile = sys.argv[1]
 y_col = int(sys.argv[2])-1
 x_cols = sys.argv[3].split(',')
 outfile = sys.argv[4]
 outfile2 = sys.argv[5]
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 )
 fout = open(outfile,'w')
 
 for i, line in enumerate( file ( infile )):
     line = line.rstrip('\r\n')
     if len( line )>0 and not line.startswith( '#' ):
         elems = line.split( '\t' )
-        break 
+        break
     if i == 30:
         break # Hopefully we'll never get here...
 
@@ -32,12 +33,12 @@
 y_vals = []
 x_vals = []
 
-for k,col in enumerate(x_cols):
+for k, col in enumerate(x_cols):
     x_cols[k] = int(col)-1
     x_vals.append([])
     
 NA = 'NA'
-for ind,line in enumerate( file( infile )):
+for ind, line in enumerate( file( infile ) ):
     if line and not line.startswith( '#' ):
         try:
             fields = line.split("\t")
@@ -46,7 +47,7 @@
             except Exception, ey:
                 yval = r('NA')
             y_vals.append(yval)
-            for k,col in enumerate(x_cols):
+            for k, col in enumerate(x_cols):
                 try:
                     xval = float(fields[col])
                 except Exception, ex:
@@ -59,10 +60,10 @@
 
 x_vals1 = numpy.asarray(x_vals).transpose()
 
-dat= r.list(x=array(x_vals1), y=y_vals)
+dat = r.list(x=array(x_vals1), y=y_vals)
 
 r.library("leaps")
- 
+
 set_default_mode(NO_CONVERSION)
 try:
     leaps = r.regsubsets(r("y ~ x"), data= r.na_exclude(dat))
@@ -75,10 +76,10 @@
 pattern = "["
 for i in range(tot):
     pattern = pattern + 'c' + str(int(x_cols[int(i)]) + 1) + ' '
-pattern = pattern.strip() + ']'  
-print >>fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" %(pattern)
-for ind,item in enumerate(summary['outmat']):
-    print >>fout, "%s\t%s\t%s\t%s\t%s\t%s" %(str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind])
+pattern = pattern.strip() + ']'
+print >> fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" % (pattern)
+for ind, item in enumerate(summary['outmat']):
+    print >> fout, "%s\t%s\t%s\t%s\t%s\t%s" % (str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind])
 
 
 r.pdf( outfile2, 8, 8 )

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/featureCounter.py
--- a/tools/regVariation/featureCounter.py
+++ b/tools/regVariation/featureCounter.py
@@ -11,8 +11,7 @@
 from galaxy import eggs
 import pkg_resources
 pkg_resources.require( "bx-python" )
-import sys, traceback, fileinput
-from warnings import warn
+import sys, fileinput
 from bx.intervals.io import *
 from bx.cookbook import doc_optparse
 from bx.intervals.operations import quicksect
@@ -33,7 +32,7 @@
             partial += 1
         if node.left and node.left.maxend > start:
             counter(node.left, start, end)
-        if node.right: 
+        if node.right:
             counter(node.right, start, end)
     elif start < node.start < end:
         if node.end <= end:
@@ -42,10 +41,10 @@
             partial += 1
         if node.left and node.left.maxend > start:
             counter(node.left, start, end)
-        if node.right: 
+        if node.right:
             counter(node.right, start, end)
     else:
-        if node.left: 
+        if node.left:
             counter(node.left, start, end)
 
 def count_coverage( readers, comments=True ):
@@ -58,8 +57,8 @@
         if type( item ) is GenomicInterval:
             rightTree.insert( item, secondary.linenum, item.fields )
     
-    bitsets = secondary_copy.binned_bitsets() 
-        
+    bitsets = secondary_copy.binned_bitsets()
+    
     global full, partial
     
     for interval in primary:
@@ -82,7 +81,7 @@
                 bases_covered = bitsets[ chrom ].count_range( start, end-start )
                 if (end - start) == 0:
                     percent = 0
-                else: 
+                else:
                     percent = float(bases_covered) / float(end - start)
                 if bases_covered:
                     root = rightTree.chroms[chrom]    #root node for the chrom tree
@@ -92,13 +91,14 @@
             interval.fields.append(str(full))
             interval.fields.append(str(partial))
             yield interval
-    
+
+
 def main():
     options, args = doc_optparse.parse( __doc__ )
     
     try:
         chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
-        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )      
+        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
         in1_fname, in2_fname, out_fname = args
     except:
         stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
@@ -126,7 +126,7 @@
     out_file = open( out_fname, "w" )
 
     try:
-        for line in count_coverage([g1,g2,g2_copy]):
+        for line in count_coverage([g1, g2, g2_copy]):
             if type( line ) is GenomicInterval:
                 out_file.write( "%s\n" % "\t".join( line.fields ) )
             else:
@@ -143,6 +143,7 @@
         print skipped( g2, filedesc=" of 2nd dataset" )
     elif g2_copy.skipped > 0:
         print skipped( g2_copy, filedesc=" of 2nd dataset" )
-        
+
+
 if __name__ == "__main__":
     main()

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/getIndelRates_3way.py
--- a/tools/regVariation/getIndelRates_3way.py
+++ b/tools/regVariation/getIndelRates_3way.py
@@ -6,7 +6,6 @@
 pkg_resources.require( "bx-python" )
 
 import sys, os, tempfile
-import traceback
 import fileinput
 from warnings import warn
 
@@ -18,7 +17,8 @@
 def stop_err(msg):
     sys.stderr.write(msg)
     sys.exit()
-    
+
+
 def counter(node, start, end, sort_col):
     global full, blk_len, blk_list
     if node.start < start:
@@ -31,14 +31,14 @@
             blk_len += int(node.other[sort_col+2])
         if node.left and node.left.maxend > start:
             counter(node.left, start, end, sort_col)
-        if node.right: 
+        if node.right:
             counter(node.right, start, end, sort_col)
     elif node.start > end:
-        if node.left: 
+        if node.left:
             counter(node.left, start, end, sort_col)
-            
 
-infile = sys.argv[1]  
+
+infile = sys.argv[1]
 fout = open(sys.argv[2],'w')
 int_file = sys.argv[3]
 if int_file != "None": #User has specified an interval file
@@ -48,9 +48,9 @@
         chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] )
     except:
         stop_err("Unable to open input Interval file")
-        
+
+
 def main():
-
     for i, line in enumerate( file ( infile )):
         line = line.rstrip('\r\n')
         if len( line )>0 and not line.startswith( '#' ):
@@ -86,8 +86,7 @@
             break
         except:
             continue
-        
-        
+    
     fin = open(infile, 'r')
     skipped = 0
     
@@ -98,7 +97,7 @@
             os.system(cmdline)
         except:
             stop_err("Encountered error while sorting the input file.")
-        print >>fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" %(species[0],species[1],species[2],species[0],species[1],species[2])
+        print >> fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" % ( species[0], species[1], species[2], species[0], species[1], species[2] )
         prev_bnum = -1
         sorted_infile.seek(0)
         for line in sorted_infile.readlines():
@@ -112,16 +111,16 @@
                     if prev_bnum != -1:
                         irate = []
                         drate = []
-                        for i,elem in enumerate(inserts):
+                        for i, elem in enumerate(inserts):
                             try:
-                                irate.append(str("%.2e" %(inserts[i]/blen[i])))
+                                irate.append(str("%.2e" % (inserts[i]/blen[i])))
                             except:
                                 irate.append('0')
                             try:
-                                drate.append(str("%.2e" %(deletes[i]/blen[i])))
+                                drate.append(str("%.2e" % (deletes[i]/blen[i])))
                             except:
                                 drate.append('0')
-                        print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate))
+                        print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) )
                     inserts = [0.0, 0.0, 0.0]
                     deletes = [0.0, 0.0, 0.0]
                     blen = []
@@ -134,25 +133,24 @@
                     inserts[sp_ind] += 1
                 elif elems[1].endswith('delete'):
                     deletes[sp_ind] += 1
-                prev_bnum = new_bnum 
+                prev_bnum = new_bnum
             except Exception, ei:
                 #print >>sys.stderr, ei
                 continue
         irate = []
         drate = []
-        for i,elem in enumerate(inserts):
+        for i, elem in enumerate(inserts):
             try:
-                irate.append(str("%.2e" %(inserts[i]/blen[i])))
+                irate.append(str("%.2e" % (inserts[i]/blen[i])))
             except:
                 irate.append('0')
             try:
-                drate.append(str("%.2e" %(deletes[i]/blen[i])))
+                drate.append(str("%.2e" % (deletes[i]/blen[i])))
             except:
                 drate.append('0')
-        print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate))
+        print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) )
         sys.exit()
     
-    
     inf = open(infile, 'r')
     start_met = False
     end_met = False
@@ -163,14 +161,14 @@
         try:
             assert int(elems[0])
             assert len(elems) == 18
-            if dbkey_i not in elems[1]: 
-                if not(start_met):   
+            if dbkey_i not in elems[1]:
+                if not(start_met):
                     continue
                 else:
                     sp_end = n
                     break
             else:
-                print >>sp_file, line
+                print >> sp_file, line
                 if not(start_met):
                     start_met = True
                     sp_start = n
@@ -201,7 +199,7 @@
     for item in indel:
         if type( item ) is GenomicInterval:
             indelTree.insert( item, indel.linenum, item.fields )
-    result=[]
+    result = []
     
     global full, blk_len, blk_list
     for interval in win:
@@ -213,14 +211,14 @@
             chrom = interval.chrom
             start = int(interval.start)
             end = int(interval.end)
-            if start > end: 
+            if start > end:
                 warn( "Interval start after end!" )
-            ins_chr = "%s.%s_insert" %(dbkey_i,chrom)
-            del_chr = "%s.%s_delete" %(dbkey_i,chrom)
+            ins_chr = "%s.%s_insert" % ( dbkey_i, chrom )
+            del_chr = "%s.%s_delete" % ( dbkey_i, chrom )
             irate = 0
             drate = 0
             if ins_chr not in indelTree.chroms and del_chr not in indelTree.chroms:
-                pass    
+                pass
             else:
                 if ins_chr in indelTree.chroms:
                     full = 0.0
@@ -242,8 +240,9 @@
                 
             interval.fields.append(str("%.2e" %irate))
             interval.fields.append(str("%.2e" %drate))
-            print >>fout, "\t".join(interval.fields)
+            print >> fout, "\t".join(interval.fields)
             fout.flush()
 
+
 if __name__ == "__main__":
-    main()    
\ No newline at end of file
+    main()

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/getIndels.py
--- a/tools/regVariation/getIndels.py
+++ b/tools/regVariation/getIndels.py
@@ -8,13 +8,12 @@
 
 from __future__ import division
 from galaxy import eggs
-import pkg_resources 
+import pkg_resources
 pkg_resources.require( "bx-python" )
 try:
     pkg_resources.require("numpy")
 except:
     pass
-import psyco_full
 import sys
 from bx.cookbook import doc_optparse
 from galaxy.tools.exception_handling import *
@@ -22,24 +21,24 @@
 
 assert sys.version_info[:2] >= ( 2, 4 )
 
-def main():   
+def main():
     # Parsing Command Line here
     options, args = doc_optparse.parse( __doc__ )
     
     try:
-        inp_file, out_file1 = args    
+        inp_file, out_file1 = args
     except:
         print >> sys.stderr, "Tool initialization error."
         sys.exit()
     
     try:
-        fin = open(inp_file,'r')
+        open(inp_file, 'r')
     except:
         print >> sys.stderr, "Unable to open input file"
         sys.exit()
     try:
-        fout1 = open(out_file1,'w')
-        #fout2 = open(out_file2,'w')
+        fout1 = open(out_file1, 'w')
+        #fout2 = open(out_file2, 'w')
     except:
         print >> sys.stderr, "Unable to open output file"
         sys.exit()
@@ -47,11 +46,10 @@
     try:
         maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
     except:
-        print >>sys.stderr, "Your MAF file appears to be malformed."
+        print >> sys.stderr, "Your MAF file appears to be malformed."
         sys.exit()
-    maf_count = 0
     
-    print >>fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length"
+    print >> fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length"
     for block_ind, block in enumerate(maf_reader):
         if len(block.components) < 2:
             continue
@@ -84,19 +82,19 @@
                         #write 2
                         if prev_pos_gap2 == 1:
                             prev_pos_gap2 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1,nt_pos1+1,nt_pos2-1,nt_pos2-1+gaplen2,gaplen2)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1, nt_pos1+1, nt_pos2-1, nt_pos2-1+gaplen2, gaplen2 )
                         if pos == len(seq1)-1:
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1,nt_pos1+1,nt_pos2+1-gaplen1,nt_pos2+1,gaplen1)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1, nt_pos1+1, nt_pos2+1-gaplen1, nt_pos2+1, gaplen1 )
                     else:
                         prev_pos_gap1 = 0
                         prev_pos_gap2 = 0
                         """
                         if prev_pos_gap1 == 1:
                             prev_pos_gap1 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,gaplen1)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, gaplen1 )
                         elif prev_pos_gap2 == 1:
                             prev_pos_gap2 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos2-1,nt_pos2,gaplen2)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos2-1, nt_pos2, gaplen2 )
                         """
                 else:
                     nt_pos1 += 1
@@ -105,19 +103,21 @@
                         #write both
                         if prev_pos_gap1 == 1:
                             prev_pos_gap1 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2-gaplen1,nt_pos2,gaplen1)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2-gaplen1, nt_pos2, gaplen1 )
                         elif prev_pos_gap2 == 1:
                             prev_pos_gap2 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1-gaplen2,nt_pos1,nt_pos2-1,nt_pos2,gaplen2)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1-gaplen2, nt_pos1, nt_pos2-1, nt_pos2, gaplen2 )
                     else:
                         gaplen2 += 1
                         prev_pos_gap2 = 1
                         #write 1
                         if prev_pos_gap1 == 1:
                             prev_pos_gap1 = 0
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2,nt_pos2+gaplen1,gaplen1)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2, nt_pos2+gaplen1, gaplen1 )
                         if pos == len(seq1)-1:
-                            print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1+1-gaplen2,nt_pos1+1,nt_pos2,nt_pos2+1,gaplen2)
+                            print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1+1-gaplen2, nt_pos1+1, nt_pos2, nt_pos2+1, gaplen2 )
                 pos += 1
+
+
 if __name__ == "__main__":
     main()

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/linear_regression.py
--- a/tools/regVariation/linear_regression.py
+++ b/tools/regVariation/linear_regression.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 from galaxy import eggs
-import sys, string
+import sys
 from rpy import *
 import numpy
 
@@ -15,14 +15,14 @@
 outfile = sys.argv[4]
 outfile2 = sys.argv[5]
 
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 )
 fout = open(outfile,'w')
 elems = []
 for i, line in enumerate( file ( infile )):
     line = line.rstrip('\r\n')
     if len( line )>0 and not line.startswith( '#' ):
         elems = line.split( '\t' )
-        break 
+        break
     if i == 30:
         break # Hopefully we'll never get here...
 
@@ -32,12 +32,12 @@
 y_vals = []
 x_vals = []
 
-for k,col in enumerate(x_cols):
+for k, col in enumerate(x_cols):
     x_cols[k] = int(col)-1
     x_vals.append([])
 
 NA = 'NA'
-for ind,line in enumerate( file( infile )):
+for ind, line in enumerate( file( infile )):
     if line and not line.startswith( '#' ):
         try:
             fields = line.split("\t")
@@ -46,7 +46,7 @@
             except:
                 yval = r('NA')
             y_vals.append(yval)
-            for k,col in enumerate(x_cols):
+            for k, col in enumerate(x_cols):
                 try:
                     xval = float(fields[col])
                 except:
@@ -57,7 +57,7 @@
 
 x_vals1 = numpy.asarray(x_vals).transpose()
 
-dat= r.list(x=array(x_vals1), y=y_vals)
+dat = r.list(x=array(x_vals1), y=y_vals)
 
 set_default_mode(NO_CONVERSION)
 try:
@@ -66,8 +66,8 @@
     stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.")
 set_default_mode(BASIC_CONVERSION)
 
-coeffs=linear_model.as_py()['coefficients']
-yintercept= coeffs['(Intercept)']
+coeffs = linear_model.as_py()['coefficients']
+yintercept = coeffs['(Intercept)']
 summary = r.summary(linear_model)
 
 co = summary.get('coefficients', 'NA')
@@ -82,8 +82,8 @@
 except:
     pass
 
-print >>fout, "Y-intercept\t%s" %(yintercept)
-print >>fout, "p-value (Y-intercept)\t%s" %(pvaly)
+print >> fout, "Y-intercept\t%s" % (yintercept)
+print >> fout, "p-value (Y-intercept)\t%s" % (pvaly)
 
 if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
     try:
@@ -94,22 +94,22 @@
         pval = r.round(float(co[1][3]), digits=10)
     except:
         pval = 'NA'
-    print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope)
-    print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval)
+    print >> fout, "Slope (c%d)\t%s" % ( x_cols[0]+1, slope )
+    print >> fout, "p-value (c%d)\t%s" % ( x_cols[0]+1, pval )
 else:    #Multiple regression case with >1 predictors
-    ind=1
+    ind = 1
     while ind < len(coeffs.keys()):
         try:
             slope = r.round(float(coeffs['x'+str(ind)]), digits=10)
         except:
             slope = 'NA'
-        print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,slope)
+        print >> fout, "Slope (c%d)\t%s" % ( x_cols[ind-1]+1, slope )
         try:
             pval = r.round(float(co[ind][3]), digits=10)
         except:
             pval = 'NA'
-        print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval)
-        ind+=1
+        print >> fout, "p-value (c%d)\t%s" % ( x_cols[ind-1]+1, pval )
+        ind += 1
 
 rsq = summary.get('r.squared','NA')
 adjrsq = summary.get('adj.r.squared','NA')
@@ -125,14 +125,14 @@
 except:
     pass
 
-print >>fout, "R-squared\t%s" %(rsq)
-print >>fout, "Adjusted R-squared\t%s" %(adjrsq)
-print >>fout, "F-statistic\t%s" %(fstat)
-print >>fout, "Sigma\t%s" %(sigma)
+print >> fout, "R-squared\t%s" % (rsq)
+print >> fout, "Adjusted R-squared\t%s" % (adjrsq)
+print >> fout, "F-statistic\t%s" % (fstat)
+print >> fout, "Sigma\t%s" % (sigma)
 
 r.pdf( outfile2, 8, 8 )
 if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
-    sub_title =  "Slope = %s; Y-int = %s" %(slope,yintercept)
+    sub_title =  "Slope = %s; Y-int = %s" % ( slope, yintercept )
     try:
         r.plot(x=x_vals[0], y=y_vals, xlab="X", ylab="Y", sub=sub_title, main="Scatterplot with regression")
         r.abline(a=yintercept, b=slope, col="red")

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/logistic_regression_vif.py
--- a/tools/regVariation/logistic_regression_vif.py
+++ b/tools/regVariation/logistic_regression_vif.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 from galaxy import eggs
-import sys, string
+import sys
 from rpy import *
 import numpy
 
@@ -15,14 +15,14 @@
 outfile = sys.argv[4]
 
 
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 )
 fout = open(outfile,'w')
 elems = []
-for i, line in enumerate( file ( infile )):
+for i, line in enumerate( file( infile ) ):
     line = line.rstrip('\r\n')
     if len( line )>0 and not line.startswith( '#' ):
         elems = line.split( '\t' )
-        break 
+        break
     if i == 30:
         break # Hopefully we'll never get here...
 
@@ -32,12 +32,12 @@
 y_vals = []
 x_vals = []
 
-for k,col in enumerate(x_cols):
+for k, col in enumerate(x_cols):
     x_cols[k] = int(col)-1
     x_vals.append([])
 
 NA = 'NA'
-for ind,line in enumerate( file( infile )):
+for ind, line in enumerate( file( infile )):
     if line and not line.startswith( '#' ):
         try:
             fields = line.split("\t")
@@ -46,7 +46,7 @@
             except:
                 yval = r('NA')
             y_vals.append(yval)
-            for k,col in enumerate(x_cols):
+            for k, col in enumerate(x_cols):
                 try:
                     xval = float(fields[col])
                 except:
@@ -57,46 +57,45 @@
 
 x_vals1 = numpy.asarray(x_vals).transpose()
 
-check1=0
-check0=0
+check1 = 0
+check0 = 0
 for i in y_vals:
     if i == 1:
-        check1=1
+        check1 = 1
     if i == 0:
-        check0=1
-if check1==0 or check0==0:
+        check0 = 1
+if check1 == 0 or check0 == 0:
     sys.exit("Warning: logistic regression must have at least two classes")
 
 for i in y_vals:
-    if i not in [1,0,r('NA')]:
-        print >>fout, str(i)
+    if i not in [1, 0, r('NA')]:
+        print >> fout, str(i)
         sys.exit("Warning: the current version of this tool can run only with two classes and need to be labeled as 0 and 1.")
-    
-    
-dat= r.list(x=array(x_vals1), y=y_vals)
-novif=0
+
+dat = r.list(x=array(x_vals1), y=y_vals)
+novif = 0
 set_default_mode(NO_CONVERSION)
 try:
-    linear_model = r.glm(r("y ~ x"), data = r.na_exclude(dat),family="binomial")
+    linear_model = r.glm(r("y ~ x"), data=r.na_exclude(dat), family="binomial")
 except RException, rex:
     stop_err("Error performing logistic regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.")
 if len(x_cols)>1:
     try:
         r('suppressPackageStartupMessages(library(car))')
-        r.assign('dat',dat)
-        r.assign('ncols',len(x_cols))
-        vif=r.vif(r('glm(dat$y ~ ., data = na.exclude(data.frame(as.matrix(dat$x,ncol=ncols))->datx),family="binomial")'))
+        r.assign('dat', dat)
+        r.assign('ncols', len(x_cols))
+        vif = r.vif(r('glm(dat$y ~ ., data = na.exclude(data.frame(as.matrix(dat$x,ncol=ncols))->datx), family="binomial")'))
     except RException, rex:
         print rex
 else:
-    novif=1
-    
+    novif = 1
+
 set_default_mode(BASIC_CONVERSION)
 
-coeffs=linear_model.as_py()['coefficients']
-null_deviance=linear_model.as_py()['null.deviance']
-residual_deviance=linear_model.as_py()['deviance']
-yintercept= coeffs['(Intercept)']
+coeffs = linear_model.as_py()['coefficients']
+null_deviance = linear_model.as_py()['null.deviance']
+residual_deviance = linear_model.as_py()['deviance']
+yintercept = coeffs['(Intercept)']
 summary = r.summary(linear_model)
 co = summary.get('coefficients', 'NA')
 """
@@ -109,14 +108,14 @@
     pvaly = r.round(float(co[0][3]), digits=10)
 except:
     pass
-print >>fout, "response column\tc%d" %(y_col+1)
-tempP=[]
+print >> fout, "response column\tc%d" % (y_col+1)
+tempP = []
 for i in x_cols:
     tempP.append('c'+str(i+1))
-tempP=','.join(tempP)
-print >>fout, "predictor column(s)\t%s" %(tempP)
-print >>fout, "Y-intercept\t%s" %(yintercept)
-print >>fout, "p-value (Y-intercept)\t%s" %(pvaly)
+tempP = ','.join(tempP)
+print >> fout, "predictor column(s)\t%s" % (tempP)
+print >> fout, "Y-intercept\t%s" % (yintercept)
+print >> fout, "p-value (Y-intercept)\t%s" % (pvaly)
 
 if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
     try:
@@ -127,44 +126,43 @@
         pval = r.round(float(co[1][3]), digits=10)
     except:
         pval = 'NA'
-    print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope)
-    print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval)
+    print >> fout, "Slope (c%d)\t%s" % ( x_cols[0]+1, slope )
+    print >> fout, "p-value (c%d)\t%s" % ( x_cols[0]+1, pval )
 else:    #Multiple regression case with >1 predictors
-    ind=1
+    ind = 1
     while ind < len(coeffs.keys()):
         try:
             slope = r.round(float(coeffs['x'+str(ind)]), digits=10)
         except:
             slope = 'NA'
-        print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,slope)
+        print >> fout, "Slope (c%d)\t%s" % ( x_cols[ind-1]+1, slope )
         try:
             pval = r.round(float(co[ind][3]), digits=10)
         except:
             pval = 'NA'
-        print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval)
-        ind+=1
+        print >> fout, "p-value (c%d)\t%s" % ( x_cols[ind-1]+1, pval )
+        ind += 1
 
 rsq = summary.get('r.squared','NA')
 
-
 try:
-    rsq= r.round(float((null_deviance-residual_deviance)/null_deviance), digits=5)
-    null_deviance= r.round(float(null_deviance), digits=5)
-    residual_deviance= r.round(float(residual_deviance), digits=5)
+    rsq = r.round(float((null_deviance-residual_deviance)/null_deviance), digits=5)
+    null_deviance = r.round(float(null_deviance), digits=5)
+    residual_deviance = r.round(float(residual_deviance), digits=5)
 except:
     pass
 
-print >>fout, "Null deviance\t%s" %(null_deviance)
-print >>fout, "Residual deviance\t%s" %(residual_deviance)
-print >>fout, "pseudo R-squared\t%s" %(rsq)
-print >>fout, "\n"
-print >>fout, 'vif'
+print >> fout, "Null deviance\t%s" % (null_deviance)
+print >> fout, "Residual deviance\t%s" % (residual_deviance)
+print >> fout, "pseudo R-squared\t%s" % (rsq)
+print >> fout, "\n"
+print >> fout, 'vif'
 
-if novif==0:
-    py_vif=vif.as_py()
-    count=0
+if novif == 0:
+    py_vif = vif.as_py()
+    count = 0
     for i in sorted(py_vif.keys()):
-        print >>fout,'c'+str(x_cols[count]+1) ,str(py_vif[i])
-        count+=1
-elif novif==1:
-    print >>fout, "vif can calculate only when model have more than 1 predictor"
+        print >> fout, 'c'+str(x_cols[count]+1), str(py_vif[i])
+        count += 1
+elif novif == 1:
+    print >> fout, "vif can calculate only when model have more than 1 predictor"

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/maf_cpg_filter.py
--- a/tools/regVariation/maf_cpg_filter.py
+++ b/tools/regVariation/maf_cpg_filter.py
@@ -10,7 +10,7 @@
 """
 
 from galaxy import eggs
-import pkg_resources 
+import pkg_resources
 pkg_resources.require( "bx-python" )
 try:
     pkg_resources.require( "numpy" )
@@ -54,7 +54,7 @@
         defn = "non-CpG"
     cpgfilter.run( reader, writer.write )
     
-    print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
+    print "%2.2f percent bases masked; Mask character = %s, Definition = %s" % ( float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn )
 
 if __name__ == "__main__":
     main()

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/microsats_alignment_level.py
--- a/tools/regVariation/microsats_alignment_level.py
+++ b/tools/regVariation/microsats_alignment_level.py
@@ -4,7 +4,11 @@
 Uses SPUTNIK to fetch microsatellites and extracts orthologous repeats from the sputnik output.
 """
 from galaxy import eggs
-import sys, os, tempfile, string, math, re
+import os
+import re
+import string
+import sys
+import tempfile
 
 def reverse_complement(text):
     DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
@@ -12,31 +16,26 @@
     comp.reverse()
     return "".join(comp)
 
+
 def main():
     if len(sys.argv) != 8:
-        print >>sys.stderr, "Insufficient number of arguments."
+        print >> sys.stderr, "Insufficient number of arguments."
         sys.exit()
     
     infile = open(sys.argv[1],'r')
     separation = int(sys.argv[2])
     outfile = sys.argv[3]
-    align_type = sys.argv[4]
-    if align_type == "2way":
-        align_type_len = 2
-    elif align_type == "3way":
-        align_type_len = 3
     mono_threshold = int(sys.argv[5])
     non_mono_threshold = int(sys.argv[6])
     allow_different_units = int(sys.argv[7])
     
-    print "Min distance = %d bp; Min threshold for mono repeats = %d; Min threshold for non-mono repeats = %d; Allow different motifs = %s" %(separation, mono_threshold, non_mono_threshold, allow_different_units==1)
+    print "Min distance = %d bp; Min threshold for mono repeats = %d; Min threshold for non-mono repeats = %d; Allow different motifs = %s" % ( separation, mono_threshold, non_mono_threshold, allow_different_units==1 )
     try:
         fout = open(outfile, "w")
-        print >>fout, "#Block\tSeq1_Name\tSeq1_Start\tSeq1_End\tSeq1_Type\tSeq1_Length\tSeq1_RepeatNumber\tSeq1_Unit\tSeq2_Name\tSeq2_Start\tSeq2_End\tSeq2_Type\tSeq2_Length\tSeq2_RepeatNumber\tSeq2_Unit"
+        print >> fout, "#Block\tSeq1_Name\tSeq1_Start\tSeq1_End\tSeq1_Type\tSeq1_Length\tSeq1_RepeatNumber\tSeq1_Unit\tSeq2_Name\tSeq2_Start\tSeq2_End\tSeq2_Type\tSeq2_Length\tSeq2_RepeatNumber\tSeq2_Unit"
         #sputnik_cmd = os.path.join(os.path.split(sys.argv[0])[0], "sputnik")
         sputnik_cmd = "sputnik"
         input = infile.read()
-        skipped = 0
         block_num = 0
         input = input.replace('\r','\n')
         for block in input.split('\n\n'):
@@ -44,26 +43,24 @@
             tmpin = tempfile.NamedTemporaryFile()
             tmpout = tempfile.NamedTemporaryFile()
             tmpin.write(block.strip())
-            blk = tmpin.read()
             cmdline = sputnik_cmd + " " + tmpin.name + "  > /dev/null 2>&1 >> " + tmpout.name
             try:
                 os.system(cmdline)
-            except Exception, es:
+            except Exception:
                 continue
             sputnik_out = tmpout.read()
             tmpin.close()
             tmpout.close()
             if sputnik_out != "":
                 if len(block.split('>')[1:]) != 2:        #len(sputnik_out.split('>')):
-                    skipped += 1
                     continue
                 align_block = block.strip().split('>')
                 
                 lendict = {'mononucleotide':1, 'dinucleotide':2, 'trinucleotide':3, 'tetranucleotide':4, 'pentanucleotide':5, 'hexanucleotide':6}
-                blockdict={}
-                r=0
-                namelist=[]
-                for k,sput_block in enumerate(sputnik_out.split('>')[1:]):
+                blockdict = {}
+                r = 0
+                namelist = []
+                for k, sput_block in enumerate(sputnik_out.split('>')[1:]):
                     whole_seq = ''.join(align_block[k+1].split('\n')[1:]).replace('\n','').strip()
                     p = re.compile('\n(\S*nucleotide)')
                     repeats = p.split(sput_block.strip())
@@ -71,13 +68,12 @@
                     j = 1
                     name = repeats[0].strip()
                     try:
-                        coords = re.search('\d+[-_:]\d+',name).group()
-                        coords = coords.replace('_','-').replace(':','-')
-                    except Exception, e:
+                        coords = re.search('\d+[-_:]\d+', name).group()
+                        coords = coords.replace('_', '-').replace(':', '-')
+                    except Exception:
                         coords = '0-0'
-                        pass
                     r += 1
-                    blockdict[r]={}
+                    blockdict[r] = {}
                     try:
                         sp_name = name[:name.index('.')]
                         chr_name = name[name.index('.'):name.index('(')]
@@ -91,11 +87,10 @@
                                 continue
                             
                             if blockdict[r].has_key('types'):
-                                blockdict[r]['types'].append(repeats[j].strip())        #type of microsat     
+                                blockdict[r]['types'].append(repeats[j].strip())  #type of microsat
                             else:
-                                blockdict[r]['types'] = [repeats[j].strip()]               #type of microsat  
+                                blockdict[r]['types'] = [repeats[j].strip()]  #type of microsat
                             
-                            sequence = ''.join(align_block[r].split('\n')[1:]).replace('\n','').strip()
                             start = int(repeats[j+1].split('--')[0].split(':')[0].strip())
                             #check to see if there are gaps before the start of the repeat, and change the start accordingly
                             sgaps = 0
@@ -107,7 +102,7 @@
                                     break    #break at the 1st non-gap character
                                 ch_pos -= 1
                             if blockdict[r].has_key('starts'):
-                                blockdict[r]['starts'].append(start+sgaps)        #start co-ords adjusted with alignment co-ords to include GAPS    
+                                blockdict[r]['starts'].append(start+sgaps)  #start co-ords adjusted with alignment co-ords to include GAPS
                             else:
                                 blockdict[r]['starts'] = [start+sgaps]
                             
@@ -120,7 +115,7 @@
                                 else:
                                     break    #break at the 1st non-gap character
                             if blockdict[r].has_key('ends'):
-                                blockdict[r]['ends'].append(end+egaps)        #end co-ords adjusted with alignment co-ords to include GAPS    
+                                blockdict[r]['ends'].append(end+egaps)  #end co-ords adjusted with alignment co-ords to include GAPS
                             else:
                                 blockdict[r]['ends'] = [end+egaps]
                                 
@@ -134,20 +129,20 @@
                             gaps_before_start = whole_seq[:rel_start].count('-')
                             
                             if blockdict[r].has_key('gaps_before_start'):
-                                blockdict[r]['gaps_before_start'].append(gaps_before_start)  #lengths  
+                                blockdict[r]['gaps_before_start'].append(gaps_before_start)  #lengths
                             else:
-                                blockdict[r]['gaps_before_start'] = [gaps_before_start]       #lengths 
+                                blockdict[r]['gaps_before_start'] = [gaps_before_start]  #lengths
                             
-                            whole_seq_start= int(coords.split('-')[0])
+                            whole_seq_start = int(coords.split('-')[0])
                             if blockdict[r].has_key('whole_seq_start'):
-                                blockdict[r]['whole_seq_start'].append(whole_seq_start)  #lengths  
+                                blockdict[r]['whole_seq_start'].append(whole_seq_start)  #lengths
                             else:
-                                blockdict[r]['whole_seq_start'] = [whole_seq_start]       #lengths 
+                                blockdict[r]['whole_seq_start'] = [whole_seq_start]  #lengths
                                 
                             if blockdict[r].has_key('lengths'):
-                                blockdict[r]['lengths'].append(repeat_len)  #lengths  
+                                blockdict[r]['lengths'].append(repeat_len)  #lengths
                             else:
-                                blockdict[r]['lengths'] = [repeat_len]       #lengths 
+                                blockdict[r]['lengths'] = [repeat_len]  #lengths
                             
                             if blockdict[r].has_key('counts'):
                                 blockdict[r]['counts'].append(str(int(repeat_len)/lendict[repeats[j].strip()]))  #Repeat Unit
@@ -159,10 +154,10 @@
                             else:
                                 blockdict[r]['units'] = [repeat_seq[:lendict[repeats[j].strip()]]]         #Repeat Unit
                             
-                        except Exception, eh:
+                        except Exception:
                             pass
-                        j+=2
-                    #check the co-ords of all repeats corresponding to a sequence and remove adjacent repeats separated by less than the user-specified 'separation'. 
+                        j += 2
+                    #check the co-ords of all repeats corresponding to a sequence and remove adjacent repeats separated by less than the user-specified 'separation'.
                     delete_index_list = []
                     for ind, item in enumerate(blockdict[r]['ends']):
                         try:
@@ -171,7 +166,7 @@
                                     delete_index_list.append(ind)
                                 if ind+1 not in delete_index_list:
                                     delete_index_list.append(ind+1)
-                        except Exception, ek:
+                        except Exception:
                             pass
                     for index in delete_index_list:    #mark them for deletion
                         try:
@@ -183,7 +178,7 @@
                             blockdict[r]['lengths'][index] = 'marked'
                             blockdict[r]['counts'][index] = 'marked'
                             blockdict[r]['units'][index] = 'marked'
-                        except Exception, ej:
+                        except Exception:
                             pass
                     #remove 'marked' elements from all the lists
                     """
@@ -192,19 +187,19 @@
                             if elem == 'marked':
                                 blockdict[r][key].remove(elem)
                     """
-                    #print blockdict    
+                    #print blockdict
                 
-                #make sure that the blockdict has keys for both the species   
+                #make sure that the blockdict has keys for both the species
                 if (1 not in blockdict) or (2 not in blockdict):
                     continue
                 
                 visited_2 = [0 for x in range(len(blockdict[2]['starts']))]
-                for ind1,coord_s1 in enumerate(blockdict[1]['starts']):
+                for ind1, coord_s1 in enumerate(blockdict[1]['starts']):
                     if coord_s1 == 'marked':
                         continue
                     coord_e1 = blockdict[1]['ends'][ind1]
                     out = []
-                    for ind2,coord_s2 in enumerate(blockdict[2]['starts']):
+                    for ind2, coord_s2 in enumerate(blockdict[2]['starts']):
                         if coord_s2 == 'marked':
                             visited_2[ind2] = 1
                             continue
@@ -216,7 +211,7 @@
                             else:
                                 if (blockdict[1]['units'][ind1] not in blockdict[2]['units'][ind2]*2) and (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2):
                                     continue
-                        #print >>sys.stderr, (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2)
+                        #print >> sys.stderr, (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2)
                         #skip if the repeat number thresholds are not met
                         if blockdict[1]['types'][ind1] == 'mononucleotide':
                             if (int(blockdict[1]['counts'][ind1]) < mono_threshold):
@@ -231,12 +226,12 @@
                         else:
                             if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold):
                                 continue
-                        #print "s1,e1=%s,%s; s2,e2=%s,%s" %(coord_s1,coord_e1,coord_s2,coord_e2)
-                        if (coord_s1 in range(coord_s2,coord_e2)) or (coord_e1 in range(coord_s2,coord_e2)):
+                        #print "s1,e1=%s,%s; s2,e2=%s,%s" % ( coord_s1, coord_e1, coord_s2, coord_e2 )
+                        if (coord_s1 in range(coord_s2, coord_e2)) or (coord_e1 in range(coord_s2, coord_e2)):
                             out.append(str(block_num))
                             out.append(namelist[0])
                             rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1]
-                            rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) 
+                            rel_end = rel_start + int(blockdict[1]['lengths'][ind1])
                             out.append(str(rel_start))
                             out.append(str(rel_end))
                             out.append(blockdict[1]['types'][ind1])
@@ -245,16 +240,16 @@
                             out.append(blockdict[1]['units'][ind1])
                             out.append(namelist[1])
                             rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2]
-                            rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) 
+                            rel_end = rel_start + int(blockdict[2]['lengths'][ind2])
                             out.append(str(rel_start))
                             out.append(str(rel_end))
                             out.append(blockdict[2]['types'][ind2])
                             out.append(blockdict[2]['lengths'][ind2])
                             out.append(blockdict[2]['counts'][ind2])
                             out.append(blockdict[2]['units'][ind2])
-                            print >>fout, '\t'.join(out)
+                            print >> fout, '\t'.join(out)
                             visited_2[ind2] = 1
-                            out=[]
+                            out = []
                 
                 if 0 in visited_2:    #there are still some elements in 2nd set which haven't found orthologs yet.
                     for ind2, coord_s2 in enumerate(blockdict[2]['starts']):
@@ -264,7 +259,7 @@
                             continue
                         coord_e2 = blockdict[2]['ends'][ind2]
                         out = []
-                        for ind1,coord_s1 in enumerate(blockdict[1]['starts']):
+                        for ind1, coord_s1 in enumerate(blockdict[1]['starts']):
                             if coord_s1 == 'marked':
                                 continue
                             coord_e1 = blockdict[1]['ends'][ind1]
@@ -290,11 +285,11 @@
                                 if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold):
                                     continue
                             
-                            if (coord_s2 in range(coord_s1,coord_e1)) or (coord_e2 in range(coord_s1,coord_e1)):
-                                out.append(str(block_num)) 
+                            if (coord_s2 in range(coord_s1, coord_e1)) or (coord_e2 in range(coord_s1, coord_e1)):
+                                out.append(str(block_num))
                                 out.append(namelist[0])
                                 rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1]
-                                rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) 
+                                rel_end = rel_start + int(blockdict[1]['lengths'][ind1])
                                 out.append(str(rel_start))
                                 out.append(str(rel_end))
                                 out.append(blockdict[1]['types'][ind1])
@@ -303,21 +298,21 @@
                                 out.append(blockdict[1]['units'][ind1])
                                 out.append(namelist[1])
                                 rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2]
-                                rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) 
+                                rel_end = rel_start + int(blockdict[2]['lengths'][ind2])
                                 out.append(str(rel_start))
                                 out.append(str(rel_end))
                                 out.append(blockdict[2]['types'][ind2])
                                 out.append(blockdict[2]['lengths'][ind2])
                                 out.append(blockdict[2]['counts'][ind2])
                                 out.append(blockdict[2]['units'][ind2])
-                                print >>fout, '\t'.join(out)
+                                print >> fout, '\t'.join(out)
                                 visited_2[ind2] = 1
-                                out=[]
+                                out = []
                             
-                    #print >>fout, blockdict
+                    #print >> fout, blockdict
     except Exception, exc:
-        print >>sys.stderr, "type(exc),args,exc: %s, %s, %s" %(type(exc), exc.args, exc)
+        print >> sys.stderr, "type(exc),args,exc: %s, %s, %s" % ( type(exc), exc.args, exc )
+
 
 if __name__ == "__main__":
     main()
-    

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/microsats_mutability.py
--- a/tools/regVariation/microsats_mutability.py
+++ b/tools/regVariation/microsats_mutability.py
@@ -4,7 +4,10 @@
 This tool computes microsatellite mutability for the orthologous microsatellites fetched from  'Extract Orthologous Microsatellites from pair-wise alignments' tool.
 """
 from galaxy import eggs
-import sys, string, re, commands, tempfile, os, fileinput
+import fileinput
+import string
+import sys
+import tempfile
 from galaxy.tools.util.galaxyops import *
 from bx.intervals.io import *
 from bx.intervals.operations import quicksect
@@ -19,7 +22,7 @@
 p_group_cols = [p_group, p_group+7]
 s_group_cols = [s_group, s_group+7]
 num_generations = int(sys.argv[7])
-region = sys.argv[8] 
+region = sys.argv[8]
 int_file = sys.argv[9]
 if int_file != "None": #User has specified an interval file
     try:
@@ -28,31 +31,35 @@
         chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[11] )
     except:
         stop_err("Unable to open input Interval file")
-    
+
+
 def stop_err(msg):
     sys.stderr.write(msg)
     sys.exit()
 
+
 def reverse_complement(text):
     DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
     comp = [ch for ch in text.translate(DNA_COMP)]
     comp.reverse()
     return "".join(comp)
 
+
 def get_unique_elems(elems):
-    seen=set()
+    seen = set()
     return[x for x in elems if x not in seen and not seen.add(x)]
 
+
 def get_binned_lists(uniqlist, binsize):
-    binnedlist=[]
+    binnedlist = []
     uniqlist.sort()
     start = int(uniqlist[0])
-    bin_ind=0
-    l_ind=0
+    bin_ind = 0
+    l_ind = 0
     binnedlist.append([])
     while l_ind < len(uniqlist):
         elem = int(uniqlist[l_ind])
-        if elem in range(start,start+binsize):
+        if elem in range(start, start+binsize):
             binnedlist[bin_ind].append(elem)
         else:
             start += binsize
@@ -62,39 +69,38 @@
         l_ind += 1
     return binnedlist
 
-def fetch_weight(H,C,t):
+
+def fetch_weight(H, C, t):
     if (H-(C-H)) < t:
         return 2.0
     else:
         return 1.0
 
-def mutabilityEstimator(repeats1,repeats2,thresholds):
+
+def mutabilityEstimator(repeats1, repeats2, thresholds):
     mut_num = 0.0    #Mutability Numerator
     mut_den = 0.0    #Mutability denominator
-    for ind,H in enumerate(repeats1):
+    for ind, H in enumerate(repeats1):
         C = repeats2[ind]
         t = thresholds[ind]
-        w = fetch_weight(H,C,t)
+        w = fetch_weight(H, C, t)
         mut_num += ((H-C)*(H-C)*w)
         mut_den += w
     return [mut_num, mut_den]
 
+
 def output_writer(blk, blk_lines):
     global winspecies, speciesind
-    all_elems_1=[]
-    all_elems_2=[]
-    all_s_elems_1=[]
-    all_s_elems_2=[]
+    all_elems_1 = []
+    all_elems_2 = []
+    all_s_elems_1 = []
+    all_s_elems_2 = []
     for bline in blk_lines:
         if not(bline):
             continue
         items = bline.split('\t')
         seq1 = items[1]
-        start1 = items[2]
-        end1 = items[3]
         seq2 = items[8]
-        start2 = items[9]
-        end2 = items[10] 
         if p_group_cols[0] == 6:
             items[p_group_cols[0]] = int(items[p_group_cols[0]])
             items[p_group_cols[1]] = int(items[p_group_cols[1]])
@@ -111,8 +117,8 @@
     if s_group_cols[0] != -1:
         uniq_s_elems_1 = get_unique_elems(all_s_elems_1)
         uniq_s_elems_2 = get_unique_elems(all_s_elems_2)
-    mut1={}
-    mut2={}
+    mut1 = {}
+    mut2 = {}
     count1 = {}
     count2 = {}
     """
@@ -120,12 +126,12 @@
         uniq_elems_1 = get_unique_units(j.sort(lambda x, y: len(x)-len(y)))
     """
     if p_group_cols[0] == 6:    #i.e. the option chosen is group-by repeat number.
-        uniq_elems_1 = get_binned_lists(uniq_elems_1,p_bin_size)
-        uniq_elems_2 = get_binned_lists(uniq_elems_2,p_bin_size)
+        uniq_elems_1 = get_binned_lists( uniq_elems_1, p_bin_size )
+        uniq_elems_2 = get_binned_lists( uniq_elems_2, p_bin_size )
         
     if s_group_cols[0] == 6:    #i.e. the option chosen is subgroup-by repeat number.
-        uniq_s_elems_1 = get_binned_lists(uniq_s_elems_1,s_bin_size)
-        uniq_s_elems_2 = get_binned_lists(uniq_s_elems_2,s_bin_size)
+        uniq_s_elems_1 = get_binned_lists( uniq_s_elems_1, s_bin_size )
+        uniq_s_elems_2 = get_binned_lists( uniq_s_elems_2, s_bin_size )
 
     for pitem1 in uniq_elems_1:
         #repeats1 = []
@@ -143,61 +149,61 @@
                         if p_group_cols[0] == 6:
                             belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
                         if belems[p_group_cols[0]] in pitem1:
-                            if belems[s_group_cols[0]]==sitem1:
+                            if belems[s_group_cols[0]] == sitem1:
                                 repeats1.append(int(belems[6]))
                                 repeats2.append(int(belems[13]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut1[str(pitem1)+'\t'+str(sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
+                                mut1[str(pitem1)+'\t'+str(sitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds )
                                 if region == 'align':
-                                    count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2))
-                                else:    
+                                    count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1)
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1)
                                     elif winspecies == 2:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2)
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2)
                     else:
                         if type(sitem1) == list:
                             if s_group_cols[0] == 6:
                                 belems[s_group_cols[0]] = int(belems[s_group_cols[0]])
-                            if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]] in sitem1:
+                            if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] in sitem1:
                                 repeats1.append(int(belems[6]))
                                 repeats2.append(int(belems[13]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
+                                mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds )
                                 if region == 'align':
-                                    count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2))
-                                else:    
+                                    count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats1)
+                                        count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats1)
                                     elif winspecies == 2:
-                                        count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats2)
+                                        count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats2)
                         else:
-                            if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]]==sitem1:
+                            if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] == sitem1:
                                 repeats1.append(int(belems[6]))
                                 repeats2.append(int(belems[13]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
+                                mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds )
                                 if region == 'align':
-                                    count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2))
-                                else:    
+                                    count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1)
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1)
                                     elif winspecies == 2:
-                                        count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2)
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2)
         else:   #Sub-group by feature is None
             for bline in blk_lines:
                 belems = bline.split('\t')
                 if type(pitem1) == list:
-                    #print >>sys.stderr, "item: " + str(item1)
+                    #print >> sys.stderr, "item: " + str(item1)
                     if p_group_cols[0] == 6:
                         belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
                     if belems[p_group_cols[0]] in pitem1:
@@ -208,21 +214,21 @@
                         else:
                             thresholds.append(non_mono_threshold)
                 else:
-                    if belems[p_group_cols[0]]==pitem1:
+                    if belems[p_group_cols[0]] == pitem1:
                         repeats1.append(int(belems[6]))
                         repeats2.append(int(belems[13]))
                         if belems[4] == 'mononucleotide':
                             thresholds.append(mono_threshold)
                         else:
                             thresholds.append(non_mono_threshold)
-            mut1["%s" %(pitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds)
+            mut1["%s" % (pitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds )
             if region == 'align':
-                count1["%s" %(pitem1)]=min(sum(repeats1),sum(repeats2))
-            else:            
+                count1["%s" % (pitem1)] = min( sum(repeats1), sum(repeats2) )
+            else:
                 if winspecies == 1:
-                    count1[str(pitem1)]=sum(repeats1)
+                    count1[str(pitem1)] = sum(repeats1)
                 elif winspecies == 2:
-                    count1[str(pitem1)]=sum(repeats2)
+                    count1[str(pitem1)] = sum(repeats2)
                 
     for pitem2 in uniq_elems_2:
         #repeats1 = []
@@ -239,57 +245,57 @@
                     if type(pitem2) == list:
                         if p_group_cols[0] == 6:
                             belems[p_group_cols[1]] = int(belems[p_group_cols[1]])
-                        if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]]==sitem2:
+                        if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]] == sitem2:
                             repeats2.append(int(belems[13]))
                             repeats1.append(int(belems[6]))
                             if belems[4] == 'mononucleotide':
                                 thresholds.append(mono_threshold)
                             else:
                                 thresholds.append(non_mono_threshold)
-                            mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
+                            mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
                             #count2[str(pitem2)+'\t'+str(sitem2)]=len(repeats2)
                             if region == 'align':
-                                count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2))
-                            else: 
+                                count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
+                            else:
                                 if winspecies == 1:
-                                    count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2)
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
                                 elif winspecies == 2:
-                                    count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1)
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
                     else:
                         if type(sitem2) == list:
                             if s_group_cols[0] == 6:
                                 belems[s_group_cols[1]] = int(belems[s_group_cols[1]])
-                            if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]] in sitem2:
+                            if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] in sitem2:
                                 repeats2.append(int(belems[13]))
                                 repeats1.append(int(belems[6]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
+                                mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
                                 if region == 'align':
-                                    count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2))
-                                else: 
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2)
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
                                     elif winspecies == 2:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1)
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
                         else:
-                            if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]]==sitem2:
+                            if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] == sitem2:
                                 repeats1.append(int(belems[13]))
                                 repeats2.append(int(belems[6]))
                                 if belems[4] == 'mononucleotide':
                                     thresholds.append(mono_threshold)
                                 else:
                                     thresholds.append(non_mono_threshold)
-                                mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
+                                mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
                                 if region == 'align':
-                                    count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2))
-                                else: 
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
+                                else:
                                     if winspecies == 1:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2)
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
                                     elif winspecies == 2:
-                                        count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1)
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
         else:   #Sub-group by feature is None
             for bline in blk_lines:
                 belems = bline.split('\t')
@@ -304,21 +310,21 @@
                         else:
                             thresholds.append(non_mono_threshold)
                 else:
-                    if belems[p_group_cols[1]]==pitem2:
+                    if belems[p_group_cols[1]] == pitem2:
                         repeats2.append(int(belems[13]))
                         repeats1.append(int(belems[6]))
                         if belems[4] == 'mononucleotide':
                             thresholds.append(mono_threshold)
                         else:
                             thresholds.append(non_mono_threshold)
-            mut2["%s" %(pitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds)
+            mut2["%s" % (pitem2)] = mutabilityEstimator( repeats2, repeats1, thresholds )
             if region == 'align':
-                count2["%s" %(pitem2)]=min(sum(repeats1),sum(repeats2))
+                count2["%s" % (pitem2)] = min( sum(repeats1), sum(repeats2) )
             else:
                 if winspecies == 1:
-                    count2["%s" %(pitem2)]=sum(repeats2)
+                    count2["%s" % (pitem2)] = sum(repeats2)
                 elif winspecies == 2:
-                    count2["%s" %(pitem2)]=sum(repeats1)
+                    count2["%s" % (pitem2)] = sum(repeats1)
     for key in mut1.keys():
         if key in mut2.keys():
             mut = (mut1[key][0]+mut2[key][0])/(mut1[key][1]+mut2[key][1])
@@ -328,9 +334,9 @@
             unit_found = False
             if p_group_cols[0] == 7 or s_group_cols[0] == 7: #if it is Repeat Unit (AG, GCT etc.) check for reverse-complements too
                 if p_group_cols[0] == 7:
-                    this,other = 0,1
+                    this, other = 0, 1
                 else:
-                    this,other = 1,0
+                    this, other = 1, 0
                 groups1 = key.split('\t')
                 mutn = mut1[key][0]
                 mutd = mut1[key][1]
@@ -351,28 +357,29 @@
             else:
                 mut = mut1[key][0]/mut1[key][1]
                 count = count1[key]
-        mut = "%.2e" %(mut/num_generations)
+        mut = "%.2e" % (mut/num_generations)
         if region == 'align':
-            print >>fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count)
+            print >> fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count)
         elif region == 'win':
-            fout.write("%s\t%s\t%s\t%s\n" %(blk,key.strip(),mut,count))
+            fout.write("%s\t%s\t%s\t%s\n" % ( blk, key.strip(), mut, count ))
             fout.flush()
             
     #catch any remaining repeats, for instance if the orthologous position contained different repeat units
     for remaining_key in mut2.keys():
         mut = mut2[remaining_key][0]/mut2[remaining_key][1]
-        mut = "%.2e" %(mut/num_generations)
+        mut = "%.2e" % (mut/num_generations)
         count = count2[remaining_key]
         if region == 'align':
-            print >>fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
+            print >> fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
         elif region == 'win':
-            fout.write("%s\t%s\t%s\t%s\n" %(blk,remaining_key.strip(),mut,count))
+            fout.write("%s\t%s\t%s\t%s\n" % ( blk, remaining_key.strip(), mut, count ))
             fout.flush()
-            #print >>fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
+            #print >> fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
+
 
 def counter(node, start, end, report_func):
     if start <= node.start < end and start < node.end <= end:
-        report_func(node) 
+        report_func(node)
         if node.right:
             counter(node.right, start, end, report_func)
         if node.left:
@@ -381,8 +388,8 @@
         counter(node.right, start, end, report_func)
     elif node.start >= end and node.left and node.left.maxend > start:
         counter(node.left, start, end, report_func)
-            
-        
+
+
 def main():
     infile = sys.argv[1]
     
@@ -400,21 +407,18 @@
     if region == 'win':
         if dbkey_i in elems[1]:
             winspecies = 1
-            speciesind = 1 
+            speciesind = 1
         elif dbkey_i in elems[8]:
             winspecies = 2
             speciesind = 8
         else:
-            stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.") 
+            stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.")
         
     fin = open(infile, 'r')
     skipped = 0
-    blk=0
-    win=0
-    linestr=""
+    linestr = ""
     
     if region == 'win':
-        
         msats = NiceReaderWrapper( fileinput.FileInput( infile ),
                                 chrom_col = speciesind,
                                 start_col = speciesind+1,
@@ -435,7 +439,7 @@
                 ichr = ielems[chr_col_i]
                 istart = int(ielems[start_col_i])
                 iend = int(ielems[end_col_i])
-                isrc = "%s.%s" %(dbkey_i,ichr)
+                isrc = "%s.%s" % ( dbkey_i, ichr )
                 if isrc not in msatTree.chroms:
                     continue
                 result = []
@@ -450,14 +454,14 @@
                 tmpfile1.seek(0)
                 output_writer(iline, tmpfile1.readlines())
             except:
-                skipped+=1
+                skipped += 1
         if skipped:
-            print "Skipped %d intervals as invalid." %(skipped)
+            print "Skipped %d intervals as invalid." % (skipped)
     elif region == 'align':
         if s_group_cols[0] != -1:
-            print >>fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount"
+            print >> fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount"
         else:
-            print >>fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount"
+            print >> fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount"
         prev_bnum = -1
         try:
             for line in fin:
@@ -481,9 +485,11 @@
                 prev_bnum = new_bnum
             output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n'))
         except Exception, ea:
-            print >>sys.stderr, ea
+            print >> sys.stderr, ea
             skipped += 1
         if skipped:
-            print "Skipped %d lines as invalid." %(skipped)
+            print "Skipped %d lines as invalid." % (skipped)
+
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/partialR_square.py
--- a/tools/regVariation/partialR_square.py
+++ b/tools/regVariation/partialR_square.py
@@ -2,7 +2,7 @@
 
 from galaxy import eggs
 
-import sys, string
+import sys
 from rpy import *
 import numpy
 
@@ -13,6 +13,7 @@
     sys.stderr.write(msg)
     sys.exit()
 
+
 def sscombs(s):
     if len(s) == 1:
         return [s]
@@ -26,14 +27,14 @@
 x_cols = sys.argv[3].split(',')
 outfile = sys.argv[4]
 
-print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 )
 fout = open(outfile,'w')
 
 for i, line in enumerate( file ( infile )):
     line = line.rstrip('\r\n')
     if len( line )>0 and not line.startswith( '#' ):
         elems = line.split( '\t' )
-        break 
+        break
     if i == 30:
         break # Hopefully we'll never get here...
 
@@ -43,7 +44,7 @@
 y_vals = []
 x_vals = []
 
-for k,col in enumerate(x_cols):
+for k, col in enumerate(x_cols):
     x_cols[k] = int(col)-1
     x_vals.append([])
     """
@@ -51,13 +52,13 @@
         float( elems[x_cols[k]] )
     except:
         try:
-            msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." %( col, elems[x_cols[k]] )
+            msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." % ( col, elems[x_cols[k]] )
         except:
             msg = "This operation cannot be performed on non-numeric data."
         stop_err( msg )
     """
 NA = 'NA'
-for ind,line in enumerate( file( infile )):
+for ind, line in enumerate( file( infile )):
     if line and not line.startswith( '#' ):
         try:
             fields = line.split("\t")
@@ -65,20 +66,20 @@
                 yval = float(fields[y_col])
             except Exception, ey:
                 yval = r('NA')
-                #print >>sys.stderr, "ey = %s" %ey
+                #print >> sys.stderr, "ey = %s" %ey
             y_vals.append(yval)
-            for k,col in enumerate(x_cols):
+            for k, col in enumerate(x_cols):
                 try:
                     xval = float(fields[col])
                 except Exception, ex:
                     xval = r('NA')
-                    #print >>sys.stderr, "ex = %s" %ex
+                    #print >> sys.stderr, "ex = %s" %ex
                 x_vals[k].append(xval)
         except:
             pass
 
 x_vals1 = numpy.asarray(x_vals).transpose()
-dat= r.list(x=array(x_vals1), y=y_vals)
+dat = r.list(x=array(x_vals1), y=y_vals)
 
 set_default_mode(NO_CONVERSION)
 try:
@@ -91,7 +92,7 @@
 fullr2 = summary.get('r.squared','NA')
 
 if fullr2 == 'NA':
-    stop_error("Error in linear regression")
+    stop_err("Error in linear regression")
 
 if len(x_vals) < 10:
     s = ""
@@ -100,10 +101,10 @@
 else:
     stop_err("This tool only works with less than 10 predictors.")
 
-print >>fout, "#Model\tR-sq\tpartial_R_Terms\tpartial_R_Value"
+print >> fout, "#Model\tR-sq\tpartial_R_Terms\tpartial_R_Value"
 all_combos = sorted(sscombs(s), key=len)
 all_combos.reverse()
-for j,cols in enumerate(all_combos):
+for j, cols in enumerate(all_combos):
     #if len(cols) == len(s):    #Same as the full model above
     #    continue
     if len(cols) == 1:
@@ -113,7 +114,7 @@
         for col in cols:
             x_v.append(x_vals[int(col)])
         x_vals1 = numpy.asarray(x_v).transpose()
-    dat= r.list(x=array(x_vals1), y=y_vals)
+    dat = r.list(x=array(x_vals1), y=y_vals)
     set_default_mode(NO_CONVERSION)
     red = r.lm(r("y ~ x"), data= dat)    #Reduced model
     set_default_mode(BASIC_CONVERSION)
@@ -136,11 +137,11 @@
         partial_R_col_str = "-"
         partial_R = "-"
     try:
-        redr2 = "%.4f" %(float(redr2))
+        redr2 = "%.4f" % (float(redr2))
     except:
         pass
     try:
-        partial_R = "%.4f" %(float(partial_R))
+        partial_R = "%.4f" % (float(partial_R))
     except:
         pass
-    print >>fout, "%s\t%s\t%s\t%s" %(col_str,redr2,partial_R_col_str,partial_R)
+    print >> fout, "%s\t%s\t%s\t%s" % ( col_str, redr2, partial_R_col_str, partial_R )

This diff is so big that we needed to truncate the remainder.

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

commits-noreply＠bitbucket.org

tags

participants (1)