commit/galaxy-central: 2 new changesets
2 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/935942afdf70/ Changeset: 935942afdf70 User: nsoranzo Date: 2014-01-27 19:51:46 Summary: Remove unused imports and unused variables. Fix spacing. Affected #: 17 files diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/WeightedAverage.py --- a/tools/regVariation/WeightedAverage.py +++ b/tools/regVariation/WeightedAverage.py @@ -1,20 +1,16 @@ #!/usr/bin/env python """ - usage: %prog bed_file_1 bed_file_2 out_file -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file """ -from galaxy import eggs import collections -import sys, string +import sys #import numpy from galaxy import eggs import pkg_resources pkg_resources.require( "bx-python" ) -import sys, traceback, fileinput -from warnings import warn from galaxy.tools.util.galaxyops import * from bx.cookbook import doc_optparse @@ -27,77 +23,72 @@ sys.exit() -def FindRate(chromosome,start_stop,dictType): - OverlapList=[] +def FindRate(chromosome, start_stop, dictType): + OverlapList = [] for tempO in dictType[chromosome]: - DatabaseInterval=[tempO[0],tempO[1]] - Overlap=GetOverlap(start_stop,DatabaseInterval) - if Overlap>0: - OverlapList.append([Overlap,tempO[2]]) - - if len(OverlapList)>0: - - SumRecomb=0 - SumOverlap=0 + DatabaseInterval = [tempO[0], tempO[1]] + Overlap = GetOverlap( start_stop, DatabaseInterval ) + if Overlap > 0: + OverlapList.append([Overlap, tempO[2]]) + + if len(OverlapList) > 0: + SumRecomb = 0 + SumOverlap = 0 for member in OverlapList: - SumRecomb+=member[0]*member[1] - SumOverlap+=member[0] - averageRate=SumRecomb/SumOverlap - + SumRecomb += member[0]*member[1] + SumOverlap += member[0] + averageRate = SumRecomb/SumOverlap return averageRate - else: return 'NA' - - - -def GetOverlap(a,b): - return min(a[1],b[1])-max(a[0],b[0]) + + +def GetOverlap(a, b): + return min(a[1], b[1])-max(a[0], b[0]) + options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 ) - chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 ) + chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 ) input1, input2, input3 = args except Exception, eee: print eee stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) - +fd2 = open(input2) +lines2 = fd2.readlines() +RecombChrDict = collections.defaultdict(list) -fd2=open(input2) -lines2=fd2.readlines() -RecombChrDict=collections.defaultdict(list) - -skipped=0 +skipped = 0 for line in lines2: - temp=line.strip().split() + temp = line.strip().split() try: assert float(temp[int(name_col_2)]) except: - skipped+=1 + skipped += 1 continue - tempIndex=[int(temp[int(start_col_2)]),int(temp[int(end_col_2)]),float(temp[int(name_col_2)])] + tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])] RecombChrDict[temp[int(chr_col_2)]].append(tempIndex) -print "Skipped %d features with invalid values" %(skipped) +print "Skipped %d features with invalid values" % (skipped) -fd1=open(input1) -lines=fd1.readlines() -finalProduct='' +fd1 = open(input1) +lines = fd1.readlines() +finalProduct = '' for line in lines: - temp=line.strip().split('\t') - chromosome=temp[int(chr_col_1)] - start=int(temp[int(start_col_1)]) - stop=int(temp[int(end_col_1)]) - start_stop=[start,stop] - RecombRate=FindRate(chromosome,start_stop,RecombChrDict) + temp = line.strip().split('\t') + chromosome = temp[int(chr_col_1)] + start = int(temp[int(start_col_1)]) + stop = int(temp[int(end_col_1)]) + start_stop = [start, stop] + RecombRate = FindRate( chromosome, start_stop, RecombChrDict ) try: - RecombRate="%.4f" %(float(RecombRate)) + RecombRate = "%.4f" % (float(RecombRate)) except: - RecombRate=RecombRate - finalProduct+=line.strip()+'\t'+str(RecombRate)+'\n' -fdd=open(input3,'w') + RecombRate = RecombRate + finalProduct += line.strip()+'\t'+str(RecombRate)+'\n' +fdd = open(input3, 'w') fdd.writelines(finalProduct) fdd.close() diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/best_regression_subsets.py --- a/tools/regVariation/best_regression_subsets.py +++ b/tools/regVariation/best_regression_subsets.py @@ -2,7 +2,7 @@ from galaxy import eggs -import sys, string +import sys from rpy import * import numpy @@ -10,19 +10,20 @@ sys.stderr.write(msg) sys.exit() + infile = sys.argv[1] y_col = int(sys.argv[2])-1 x_cols = sys.argv[3].split(',') outfile = sys.argv[4] outfile2 = sys.argv[5] -print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1) +print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 ) fout = open(outfile,'w') for i, line in enumerate( file ( infile )): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): elems = line.split( '\t' ) - break + break if i == 30: break # Hopefully we'll never get here... @@ -32,12 +33,12 @@ y_vals = [] x_vals = [] -for k,col in enumerate(x_cols): +for k, col in enumerate(x_cols): x_cols[k] = int(col)-1 x_vals.append([]) NA = 'NA' -for ind,line in enumerate( file( infile )): +for ind, line in enumerate( file( infile ) ): if line and not line.startswith( '#' ): try: fields = line.split("\t") @@ -46,7 +47,7 @@ except Exception, ey: yval = r('NA') y_vals.append(yval) - for k,col in enumerate(x_cols): + for k, col in enumerate(x_cols): try: xval = float(fields[col]) except Exception, ex: @@ -59,10 +60,10 @@ x_vals1 = numpy.asarray(x_vals).transpose() -dat= r.list(x=array(x_vals1), y=y_vals) +dat = r.list(x=array(x_vals1), y=y_vals) r.library("leaps") - + set_default_mode(NO_CONVERSION) try: leaps = r.regsubsets(r("y ~ x"), data= r.na_exclude(dat)) @@ -75,10 +76,10 @@ pattern = "[" for i in range(tot): pattern = pattern + 'c' + str(int(x_cols[int(i)]) + 1) + ' ' -pattern = pattern.strip() + ']' -print >>fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" %(pattern) -for ind,item in enumerate(summary['outmat']): - print >>fout, "%s\t%s\t%s\t%s\t%s\t%s" %(str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind]) +pattern = pattern.strip() + ']' +print >> fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" % (pattern) +for ind, item in enumerate(summary['outmat']): + print >> fout, "%s\t%s\t%s\t%s\t%s\t%s" % (str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind]) r.pdf( outfile2, 8, 8 ) diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/featureCounter.py --- a/tools/regVariation/featureCounter.py +++ b/tools/regVariation/featureCounter.py @@ -11,8 +11,7 @@ from galaxy import eggs import pkg_resources pkg_resources.require( "bx-python" ) -import sys, traceback, fileinput -from warnings import warn +import sys, fileinput from bx.intervals.io import * from bx.cookbook import doc_optparse from bx.intervals.operations import quicksect @@ -33,7 +32,7 @@ partial += 1 if node.left and node.left.maxend > start: counter(node.left, start, end) - if node.right: + if node.right: counter(node.right, start, end) elif start < node.start < end: if node.end <= end: @@ -42,10 +41,10 @@ partial += 1 if node.left and node.left.maxend > start: counter(node.left, start, end) - if node.right: + if node.right: counter(node.right, start, end) else: - if node.left: + if node.left: counter(node.left, start, end) def count_coverage( readers, comments=True ): @@ -58,8 +57,8 @@ if type( item ) is GenomicInterval: rightTree.insert( item, secondary.linenum, item.fields ) - bitsets = secondary_copy.binned_bitsets() - + bitsets = secondary_copy.binned_bitsets() + global full, partial for interval in primary: @@ -82,7 +81,7 @@ bases_covered = bitsets[ chrom ].count_range( start, end-start ) if (end - start) == 0: percent = 0 - else: + else: percent = float(bases_covered) / float(end - start) if bases_covered: root = rightTree.chroms[chrom] #root node for the chrom tree @@ -92,13 +91,14 @@ interval.fields.append(str(full)) interval.fields.append(str(partial)) yield interval - + + def main(): options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) - chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) + chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) in1_fname, in2_fname, out_fname = args except: stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) @@ -126,7 +126,7 @@ out_file = open( out_fname, "w" ) try: - for line in count_coverage([g1,g2,g2_copy]): + for line in count_coverage([g1, g2, g2_copy]): if type( line ) is GenomicInterval: out_file.write( "%s\n" % "\t".join( line.fields ) ) else: @@ -143,6 +143,7 @@ print skipped( g2, filedesc=" of 2nd dataset" ) elif g2_copy.skipped > 0: print skipped( g2_copy, filedesc=" of 2nd dataset" ) - + + if __name__ == "__main__": main() diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/getIndelRates_3way.py --- a/tools/regVariation/getIndelRates_3way.py +++ b/tools/regVariation/getIndelRates_3way.py @@ -6,7 +6,6 @@ pkg_resources.require( "bx-python" ) import sys, os, tempfile -import traceback import fileinput from warnings import warn @@ -18,7 +17,8 @@ def stop_err(msg): sys.stderr.write(msg) sys.exit() - + + def counter(node, start, end, sort_col): global full, blk_len, blk_list if node.start < start: @@ -31,14 +31,14 @@ blk_len += int(node.other[sort_col+2]) if node.left and node.left.maxend > start: counter(node.left, start, end, sort_col) - if node.right: + if node.right: counter(node.right, start, end, sort_col) elif node.start > end: - if node.left: + if node.left: counter(node.left, start, end, sort_col) - -infile = sys.argv[1] + +infile = sys.argv[1] fout = open(sys.argv[2],'w') int_file = sys.argv[3] if int_file != "None": #User has specified an interval file @@ -48,9 +48,9 @@ chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] ) except: stop_err("Unable to open input Interval file") - + + def main(): - for i, line in enumerate( file ( infile )): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): @@ -86,8 +86,7 @@ break except: continue - - + fin = open(infile, 'r') skipped = 0 @@ -98,7 +97,7 @@ os.system(cmdline) except: stop_err("Encountered error while sorting the input file.") - print >>fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" %(species[0],species[1],species[2],species[0],species[1],species[2]) + print >> fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" % ( species[0], species[1], species[2], species[0], species[1], species[2] ) prev_bnum = -1 sorted_infile.seek(0) for line in sorted_infile.readlines(): @@ -112,16 +111,16 @@ if prev_bnum != -1: irate = [] drate = [] - for i,elem in enumerate(inserts): + for i, elem in enumerate(inserts): try: - irate.append(str("%.2e" %(inserts[i]/blen[i]))) + irate.append(str("%.2e" % (inserts[i]/blen[i]))) except: irate.append('0') try: - drate.append(str("%.2e" %(deletes[i]/blen[i]))) + drate.append(str("%.2e" % (deletes[i]/blen[i]))) except: drate.append('0') - print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate)) + print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) ) inserts = [0.0, 0.0, 0.0] deletes = [0.0, 0.0, 0.0] blen = [] @@ -134,25 +133,24 @@ inserts[sp_ind] += 1 elif elems[1].endswith('delete'): deletes[sp_ind] += 1 - prev_bnum = new_bnum + prev_bnum = new_bnum except Exception, ei: #print >>sys.stderr, ei continue irate = [] drate = [] - for i,elem in enumerate(inserts): + for i, elem in enumerate(inserts): try: - irate.append(str("%.2e" %(inserts[i]/blen[i]))) + irate.append(str("%.2e" % (inserts[i]/blen[i]))) except: irate.append('0') try: - drate.append(str("%.2e" %(deletes[i]/blen[i]))) + drate.append(str("%.2e" % (deletes[i]/blen[i]))) except: drate.append('0') - print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate)) + print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) ) sys.exit() - inf = open(infile, 'r') start_met = False end_met = False @@ -163,14 +161,14 @@ try: assert int(elems[0]) assert len(elems) == 18 - if dbkey_i not in elems[1]: - if not(start_met): + if dbkey_i not in elems[1]: + if not(start_met): continue else: sp_end = n break else: - print >>sp_file, line + print >> sp_file, line if not(start_met): start_met = True sp_start = n @@ -201,7 +199,7 @@ for item in indel: if type( item ) is GenomicInterval: indelTree.insert( item, indel.linenum, item.fields ) - result=[] + result = [] global full, blk_len, blk_list for interval in win: @@ -213,14 +211,14 @@ chrom = interval.chrom start = int(interval.start) end = int(interval.end) - if start > end: + if start > end: warn( "Interval start after end!" ) - ins_chr = "%s.%s_insert" %(dbkey_i,chrom) - del_chr = "%s.%s_delete" %(dbkey_i,chrom) + ins_chr = "%s.%s_insert" % ( dbkey_i, chrom ) + del_chr = "%s.%s_delete" % ( dbkey_i, chrom ) irate = 0 drate = 0 if ins_chr not in indelTree.chroms and del_chr not in indelTree.chroms: - pass + pass else: if ins_chr in indelTree.chroms: full = 0.0 @@ -242,8 +240,9 @@ interval.fields.append(str("%.2e" %irate)) interval.fields.append(str("%.2e" %drate)) - print >>fout, "\t".join(interval.fields) + print >> fout, "\t".join(interval.fields) fout.flush() + if __name__ == "__main__": - main() \ No newline at end of file + main() diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/getIndels.py --- a/tools/regVariation/getIndels.py +++ b/tools/regVariation/getIndels.py @@ -8,13 +8,12 @@ from __future__ import division from galaxy import eggs -import pkg_resources +import pkg_resources pkg_resources.require( "bx-python" ) try: pkg_resources.require("numpy") except: pass -import psyco_full import sys from bx.cookbook import doc_optparse from galaxy.tools.exception_handling import * @@ -22,24 +21,24 @@ assert sys.version_info[:2] >= ( 2, 4 ) -def main(): +def main(): # Parsing Command Line here options, args = doc_optparse.parse( __doc__ ) try: - inp_file, out_file1 = args + inp_file, out_file1 = args except: print >> sys.stderr, "Tool initialization error." sys.exit() try: - fin = open(inp_file,'r') + open(inp_file, 'r') except: print >> sys.stderr, "Unable to open input file" sys.exit() try: - fout1 = open(out_file1,'w') - #fout2 = open(out_file2,'w') + fout1 = open(out_file1, 'w') + #fout2 = open(out_file2, 'w') except: print >> sys.stderr, "Unable to open output file" sys.exit() @@ -47,11 +46,10 @@ try: maf_reader = bx.align.maf.Reader( open(inp_file, 'r') ) except: - print >>sys.stderr, "Your MAF file appears to be malformed." + print >> sys.stderr, "Your MAF file appears to be malformed." sys.exit() - maf_count = 0 - print >>fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length" + print >> fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length" for block_ind, block in enumerate(maf_reader): if len(block.components) < 2: continue @@ -84,19 +82,19 @@ #write 2 if prev_pos_gap2 == 1: prev_pos_gap2 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1,nt_pos1+1,nt_pos2-1,nt_pos2-1+gaplen2,gaplen2) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1, nt_pos1+1, nt_pos2-1, nt_pos2-1+gaplen2, gaplen2 ) if pos == len(seq1)-1: - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1,nt_pos1+1,nt_pos2+1-gaplen1,nt_pos2+1,gaplen1) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1, nt_pos1+1, nt_pos2+1-gaplen1, nt_pos2+1, gaplen1 ) else: prev_pos_gap1 = 0 prev_pos_gap2 = 0 """ if prev_pos_gap1 == 1: prev_pos_gap1 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,gaplen1) + print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, gaplen1 ) elif prev_pos_gap2 == 1: prev_pos_gap2 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos2-1,nt_pos2,gaplen2) + print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos2-1, nt_pos2, gaplen2 ) """ else: nt_pos1 += 1 @@ -105,19 +103,21 @@ #write both if prev_pos_gap1 == 1: prev_pos_gap1 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2-gaplen1,nt_pos2,gaplen1) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2-gaplen1, nt_pos2, gaplen1 ) elif prev_pos_gap2 == 1: prev_pos_gap2 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1-gaplen2,nt_pos1,nt_pos2-1,nt_pos2,gaplen2) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1-gaplen2, nt_pos1, nt_pos2-1, nt_pos2, gaplen2 ) else: gaplen2 += 1 prev_pos_gap2 = 1 #write 1 if prev_pos_gap1 == 1: prev_pos_gap1 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2,nt_pos2+gaplen1,gaplen1) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2, nt_pos2+gaplen1, gaplen1 ) if pos == len(seq1)-1: - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1+1-gaplen2,nt_pos1+1,nt_pos2,nt_pos2+1,gaplen2) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1+1-gaplen2, nt_pos1+1, nt_pos2, nt_pos2+1, gaplen2 ) pos += 1 + + if __name__ == "__main__": main() diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/linear_regression.py --- a/tools/regVariation/linear_regression.py +++ b/tools/regVariation/linear_regression.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from galaxy import eggs -import sys, string +import sys from rpy import * import numpy @@ -15,14 +15,14 @@ outfile = sys.argv[4] outfile2 = sys.argv[5] -print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1) +print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 ) fout = open(outfile,'w') elems = [] for i, line in enumerate( file ( infile )): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): elems = line.split( '\t' ) - break + break if i == 30: break # Hopefully we'll never get here... @@ -32,12 +32,12 @@ y_vals = [] x_vals = [] -for k,col in enumerate(x_cols): +for k, col in enumerate(x_cols): x_cols[k] = int(col)-1 x_vals.append([]) NA = 'NA' -for ind,line in enumerate( file( infile )): +for ind, line in enumerate( file( infile )): if line and not line.startswith( '#' ): try: fields = line.split("\t") @@ -46,7 +46,7 @@ except: yval = r('NA') y_vals.append(yval) - for k,col in enumerate(x_cols): + for k, col in enumerate(x_cols): try: xval = float(fields[col]) except: @@ -57,7 +57,7 @@ x_vals1 = numpy.asarray(x_vals).transpose() -dat= r.list(x=array(x_vals1), y=y_vals) +dat = r.list(x=array(x_vals1), y=y_vals) set_default_mode(NO_CONVERSION) try: @@ -66,8 +66,8 @@ stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.") set_default_mode(BASIC_CONVERSION) -coeffs=linear_model.as_py()['coefficients'] -yintercept= coeffs['(Intercept)'] +coeffs = linear_model.as_py()['coefficients'] +yintercept = coeffs['(Intercept)'] summary = r.summary(linear_model) co = summary.get('coefficients', 'NA') @@ -82,8 +82,8 @@ except: pass -print >>fout, "Y-intercept\t%s" %(yintercept) -print >>fout, "p-value (Y-intercept)\t%s" %(pvaly) +print >> fout, "Y-intercept\t%s" % (yintercept) +print >> fout, "p-value (Y-intercept)\t%s" % (pvaly) if len(x_vals) == 1: #Simple linear regression case with 1 predictor variable try: @@ -94,22 +94,22 @@ pval = r.round(float(co[1][3]), digits=10) except: pval = 'NA' - print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope) - print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval) + print >> fout, "Slope (c%d)\t%s" % ( x_cols[0]+1, slope ) + print >> fout, "p-value (c%d)\t%s" % ( x_cols[0]+1, pval ) else: #Multiple regression case with >1 predictors - ind=1 + ind = 1 while ind < len(coeffs.keys()): try: slope = r.round(float(coeffs['x'+str(ind)]), digits=10) except: slope = 'NA' - print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,slope) + print >> fout, "Slope (c%d)\t%s" % ( x_cols[ind-1]+1, slope ) try: pval = r.round(float(co[ind][3]), digits=10) except: pval = 'NA' - print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval) - ind+=1 + print >> fout, "p-value (c%d)\t%s" % ( x_cols[ind-1]+1, pval ) + ind += 1 rsq = summary.get('r.squared','NA') adjrsq = summary.get('adj.r.squared','NA') @@ -125,14 +125,14 @@ except: pass -print >>fout, "R-squared\t%s" %(rsq) -print >>fout, "Adjusted R-squared\t%s" %(adjrsq) -print >>fout, "F-statistic\t%s" %(fstat) -print >>fout, "Sigma\t%s" %(sigma) +print >> fout, "R-squared\t%s" % (rsq) +print >> fout, "Adjusted R-squared\t%s" % (adjrsq) +print >> fout, "F-statistic\t%s" % (fstat) +print >> fout, "Sigma\t%s" % (sigma) r.pdf( outfile2, 8, 8 ) if len(x_vals) == 1: #Simple linear regression case with 1 predictor variable - sub_title = "Slope = %s; Y-int = %s" %(slope,yintercept) + sub_title = "Slope = %s; Y-int = %s" % ( slope, yintercept ) try: r.plot(x=x_vals[0], y=y_vals, xlab="X", ylab="Y", sub=sub_title, main="Scatterplot with regression") r.abline(a=yintercept, b=slope, col="red") diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/logistic_regression_vif.py --- a/tools/regVariation/logistic_regression_vif.py +++ b/tools/regVariation/logistic_regression_vif.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from galaxy import eggs -import sys, string +import sys from rpy import * import numpy @@ -15,14 +15,14 @@ outfile = sys.argv[4] -print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1) +print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 ) fout = open(outfile,'w') elems = [] -for i, line in enumerate( file ( infile )): +for i, line in enumerate( file( infile ) ): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): elems = line.split( '\t' ) - break + break if i == 30: break # Hopefully we'll never get here... @@ -32,12 +32,12 @@ y_vals = [] x_vals = [] -for k,col in enumerate(x_cols): +for k, col in enumerate(x_cols): x_cols[k] = int(col)-1 x_vals.append([]) NA = 'NA' -for ind,line in enumerate( file( infile )): +for ind, line in enumerate( file( infile )): if line and not line.startswith( '#' ): try: fields = line.split("\t") @@ -46,7 +46,7 @@ except: yval = r('NA') y_vals.append(yval) - for k,col in enumerate(x_cols): + for k, col in enumerate(x_cols): try: xval = float(fields[col]) except: @@ -57,46 +57,45 @@ x_vals1 = numpy.asarray(x_vals).transpose() -check1=0 -check0=0 +check1 = 0 +check0 = 0 for i in y_vals: if i == 1: - check1=1 + check1 = 1 if i == 0: - check0=1 -if check1==0 or check0==0: + check0 = 1 +if check1 == 0 or check0 == 0: sys.exit("Warning: logistic regression must have at least two classes") for i in y_vals: - if i not in [1,0,r('NA')]: - print >>fout, str(i) + if i not in [1, 0, r('NA')]: + print >> fout, str(i) sys.exit("Warning: the current version of this tool can run only with two classes and need to be labeled as 0 and 1.") - - -dat= r.list(x=array(x_vals1), y=y_vals) -novif=0 + +dat = r.list(x=array(x_vals1), y=y_vals) +novif = 0 set_default_mode(NO_CONVERSION) try: - linear_model = r.glm(r("y ~ x"), data = r.na_exclude(dat),family="binomial") + linear_model = r.glm(r("y ~ x"), data=r.na_exclude(dat), family="binomial") except RException, rex: stop_err("Error performing logistic regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.") if len(x_cols)>1: try: r('suppressPackageStartupMessages(library(car))') - r.assign('dat',dat) - r.assign('ncols',len(x_cols)) - vif=r.vif(r('glm(dat$y ~ ., data = na.exclude(data.frame(as.matrix(dat$x,ncol=ncols))->datx),family="binomial")')) + r.assign('dat', dat) + r.assign('ncols', len(x_cols)) + vif = r.vif(r('glm(dat$y ~ ., data = na.exclude(data.frame(as.matrix(dat$x,ncol=ncols))->datx), family="binomial")')) except RException, rex: print rex else: - novif=1 - + novif = 1 + set_default_mode(BASIC_CONVERSION) -coeffs=linear_model.as_py()['coefficients'] -null_deviance=linear_model.as_py()['null.deviance'] -residual_deviance=linear_model.as_py()['deviance'] -yintercept= coeffs['(Intercept)'] +coeffs = linear_model.as_py()['coefficients'] +null_deviance = linear_model.as_py()['null.deviance'] +residual_deviance = linear_model.as_py()['deviance'] +yintercept = coeffs['(Intercept)'] summary = r.summary(linear_model) co = summary.get('coefficients', 'NA') """ @@ -109,14 +108,14 @@ pvaly = r.round(float(co[0][3]), digits=10) except: pass -print >>fout, "response column\tc%d" %(y_col+1) -tempP=[] +print >> fout, "response column\tc%d" % (y_col+1) +tempP = [] for i in x_cols: tempP.append('c'+str(i+1)) -tempP=','.join(tempP) -print >>fout, "predictor column(s)\t%s" %(tempP) -print >>fout, "Y-intercept\t%s" %(yintercept) -print >>fout, "p-value (Y-intercept)\t%s" %(pvaly) +tempP = ','.join(tempP) +print >> fout, "predictor column(s)\t%s" % (tempP) +print >> fout, "Y-intercept\t%s" % (yintercept) +print >> fout, "p-value (Y-intercept)\t%s" % (pvaly) if len(x_vals) == 1: #Simple linear regression case with 1 predictor variable try: @@ -127,44 +126,43 @@ pval = r.round(float(co[1][3]), digits=10) except: pval = 'NA' - print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope) - print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval) + print >> fout, "Slope (c%d)\t%s" % ( x_cols[0]+1, slope ) + print >> fout, "p-value (c%d)\t%s" % ( x_cols[0]+1, pval ) else: #Multiple regression case with >1 predictors - ind=1 + ind = 1 while ind < len(coeffs.keys()): try: slope = r.round(float(coeffs['x'+str(ind)]), digits=10) except: slope = 'NA' - print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,slope) + print >> fout, "Slope (c%d)\t%s" % ( x_cols[ind-1]+1, slope ) try: pval = r.round(float(co[ind][3]), digits=10) except: pval = 'NA' - print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval) - ind+=1 + print >> fout, "p-value (c%d)\t%s" % ( x_cols[ind-1]+1, pval ) + ind += 1 rsq = summary.get('r.squared','NA') - try: - rsq= r.round(float((null_deviance-residual_deviance)/null_deviance), digits=5) - null_deviance= r.round(float(null_deviance), digits=5) - residual_deviance= r.round(float(residual_deviance), digits=5) + rsq = r.round(float((null_deviance-residual_deviance)/null_deviance), digits=5) + null_deviance = r.round(float(null_deviance), digits=5) + residual_deviance = r.round(float(residual_deviance), digits=5) except: pass -print >>fout, "Null deviance\t%s" %(null_deviance) -print >>fout, "Residual deviance\t%s" %(residual_deviance) -print >>fout, "pseudo R-squared\t%s" %(rsq) -print >>fout, "\n" -print >>fout, 'vif' +print >> fout, "Null deviance\t%s" % (null_deviance) +print >> fout, "Residual deviance\t%s" % (residual_deviance) +print >> fout, "pseudo R-squared\t%s" % (rsq) +print >> fout, "\n" +print >> fout, 'vif' -if novif==0: - py_vif=vif.as_py() - count=0 +if novif == 0: + py_vif = vif.as_py() + count = 0 for i in sorted(py_vif.keys()): - print >>fout,'c'+str(x_cols[count]+1) ,str(py_vif[i]) - count+=1 -elif novif==1: - print >>fout, "vif can calculate only when model have more than 1 predictor" + print >> fout, 'c'+str(x_cols[count]+1), str(py_vif[i]) + count += 1 +elif novif == 1: + print >> fout, "vif can calculate only when model have more than 1 predictor" diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/maf_cpg_filter.py --- a/tools/regVariation/maf_cpg_filter.py +++ b/tools/regVariation/maf_cpg_filter.py @@ -10,7 +10,7 @@ """ from galaxy import eggs -import pkg_resources +import pkg_resources pkg_resources.require( "bx-python" ) try: pkg_resources.require( "numpy" ) @@ -54,7 +54,7 @@ defn = "non-CpG" cpgfilter.run( reader, writer.write ) - print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn) + print "%2.2f percent bases masked; Mask character = %s, Definition = %s" % ( float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn ) if __name__ == "__main__": main() diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/microsats_alignment_level.py --- a/tools/regVariation/microsats_alignment_level.py +++ b/tools/regVariation/microsats_alignment_level.py @@ -4,7 +4,11 @@ Uses SPUTNIK to fetch microsatellites and extracts orthologous repeats from the sputnik output. """ from galaxy import eggs -import sys, os, tempfile, string, math, re +import os +import re +import string +import sys +import tempfile def reverse_complement(text): DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" ) @@ -12,31 +16,26 @@ comp.reverse() return "".join(comp) + def main(): if len(sys.argv) != 8: - print >>sys.stderr, "Insufficient number of arguments." + print >> sys.stderr, "Insufficient number of arguments." sys.exit() infile = open(sys.argv[1],'r') separation = int(sys.argv[2]) outfile = sys.argv[3] - align_type = sys.argv[4] - if align_type == "2way": - align_type_len = 2 - elif align_type == "3way": - align_type_len = 3 mono_threshold = int(sys.argv[5]) non_mono_threshold = int(sys.argv[6]) allow_different_units = int(sys.argv[7]) - print "Min distance = %d bp; Min threshold for mono repeats = %d; Min threshold for non-mono repeats = %d; Allow different motifs = %s" %(separation, mono_threshold, non_mono_threshold, allow_different_units==1) + print "Min distance = %d bp; Min threshold for mono repeats = %d; Min threshold for non-mono repeats = %d; Allow different motifs = %s" % ( separation, mono_threshold, non_mono_threshold, allow_different_units==1 ) try: fout = open(outfile, "w") - print >>fout, "#Block\tSeq1_Name\tSeq1_Start\tSeq1_End\tSeq1_Type\tSeq1_Length\tSeq1_RepeatNumber\tSeq1_Unit\tSeq2_Name\tSeq2_Start\tSeq2_End\tSeq2_Type\tSeq2_Length\tSeq2_RepeatNumber\tSeq2_Unit" + print >> fout, "#Block\tSeq1_Name\tSeq1_Start\tSeq1_End\tSeq1_Type\tSeq1_Length\tSeq1_RepeatNumber\tSeq1_Unit\tSeq2_Name\tSeq2_Start\tSeq2_End\tSeq2_Type\tSeq2_Length\tSeq2_RepeatNumber\tSeq2_Unit" #sputnik_cmd = os.path.join(os.path.split(sys.argv[0])[0], "sputnik") sputnik_cmd = "sputnik" input = infile.read() - skipped = 0 block_num = 0 input = input.replace('\r','\n') for block in input.split('\n\n'): @@ -44,26 +43,24 @@ tmpin = tempfile.NamedTemporaryFile() tmpout = tempfile.NamedTemporaryFile() tmpin.write(block.strip()) - blk = tmpin.read() cmdline = sputnik_cmd + " " + tmpin.name + " > /dev/null 2>&1 >> " + tmpout.name try: os.system(cmdline) - except Exception, es: + except Exception: continue sputnik_out = tmpout.read() tmpin.close() tmpout.close() if sputnik_out != "": if len(block.split('>')[1:]) != 2: #len(sputnik_out.split('>')): - skipped += 1 continue align_block = block.strip().split('>') lendict = {'mononucleotide':1, 'dinucleotide':2, 'trinucleotide':3, 'tetranucleotide':4, 'pentanucleotide':5, 'hexanucleotide':6} - blockdict={} - r=0 - namelist=[] - for k,sput_block in enumerate(sputnik_out.split('>')[1:]): + blockdict = {} + r = 0 + namelist = [] + for k, sput_block in enumerate(sputnik_out.split('>')[1:]): whole_seq = ''.join(align_block[k+1].split('\n')[1:]).replace('\n','').strip() p = re.compile('\n(\S*nucleotide)') repeats = p.split(sput_block.strip()) @@ -71,13 +68,12 @@ j = 1 name = repeats[0].strip() try: - coords = re.search('\d+[-_:]\d+',name).group() - coords = coords.replace('_','-').replace(':','-') - except Exception, e: + coords = re.search('\d+[-_:]\d+', name).group() + coords = coords.replace('_', '-').replace(':', '-') + except Exception: coords = '0-0' - pass r += 1 - blockdict[r]={} + blockdict[r] = {} try: sp_name = name[:name.index('.')] chr_name = name[name.index('.'):name.index('(')] @@ -91,11 +87,10 @@ continue if blockdict[r].has_key('types'): - blockdict[r]['types'].append(repeats[j].strip()) #type of microsat + blockdict[r]['types'].append(repeats[j].strip()) #type of microsat else: - blockdict[r]['types'] = [repeats[j].strip()] #type of microsat + blockdict[r]['types'] = [repeats[j].strip()] #type of microsat - sequence = ''.join(align_block[r].split('\n')[1:]).replace('\n','').strip() start = int(repeats[j+1].split('--')[0].split(':')[0].strip()) #check to see if there are gaps before the start of the repeat, and change the start accordingly sgaps = 0 @@ -107,7 +102,7 @@ break #break at the 1st non-gap character ch_pos -= 1 if blockdict[r].has_key('starts'): - blockdict[r]['starts'].append(start+sgaps) #start co-ords adjusted with alignment co-ords to include GAPS + blockdict[r]['starts'].append(start+sgaps) #start co-ords adjusted with alignment co-ords to include GAPS else: blockdict[r]['starts'] = [start+sgaps] @@ -120,7 +115,7 @@ else: break #break at the 1st non-gap character if blockdict[r].has_key('ends'): - blockdict[r]['ends'].append(end+egaps) #end co-ords adjusted with alignment co-ords to include GAPS + blockdict[r]['ends'].append(end+egaps) #end co-ords adjusted with alignment co-ords to include GAPS else: blockdict[r]['ends'] = [end+egaps] @@ -134,20 +129,20 @@ gaps_before_start = whole_seq[:rel_start].count('-') if blockdict[r].has_key('gaps_before_start'): - blockdict[r]['gaps_before_start'].append(gaps_before_start) #lengths + blockdict[r]['gaps_before_start'].append(gaps_before_start) #lengths else: - blockdict[r]['gaps_before_start'] = [gaps_before_start] #lengths + blockdict[r]['gaps_before_start'] = [gaps_before_start] #lengths - whole_seq_start= int(coords.split('-')[0]) + whole_seq_start = int(coords.split('-')[0]) if blockdict[r].has_key('whole_seq_start'): - blockdict[r]['whole_seq_start'].append(whole_seq_start) #lengths + blockdict[r]['whole_seq_start'].append(whole_seq_start) #lengths else: - blockdict[r]['whole_seq_start'] = [whole_seq_start] #lengths + blockdict[r]['whole_seq_start'] = [whole_seq_start] #lengths if blockdict[r].has_key('lengths'): - blockdict[r]['lengths'].append(repeat_len) #lengths + blockdict[r]['lengths'].append(repeat_len) #lengths else: - blockdict[r]['lengths'] = [repeat_len] #lengths + blockdict[r]['lengths'] = [repeat_len] #lengths if blockdict[r].has_key('counts'): blockdict[r]['counts'].append(str(int(repeat_len)/lendict[repeats[j].strip()])) #Repeat Unit @@ -159,10 +154,10 @@ else: blockdict[r]['units'] = [repeat_seq[:lendict[repeats[j].strip()]]] #Repeat Unit - except Exception, eh: + except Exception: pass - j+=2 - #check the co-ords of all repeats corresponding to a sequence and remove adjacent repeats separated by less than the user-specified 'separation'. + j += 2 + #check the co-ords of all repeats corresponding to a sequence and remove adjacent repeats separated by less than the user-specified 'separation'. delete_index_list = [] for ind, item in enumerate(blockdict[r]['ends']): try: @@ -171,7 +166,7 @@ delete_index_list.append(ind) if ind+1 not in delete_index_list: delete_index_list.append(ind+1) - except Exception, ek: + except Exception: pass for index in delete_index_list: #mark them for deletion try: @@ -183,7 +178,7 @@ blockdict[r]['lengths'][index] = 'marked' blockdict[r]['counts'][index] = 'marked' blockdict[r]['units'][index] = 'marked' - except Exception, ej: + except Exception: pass #remove 'marked' elements from all the lists """ @@ -192,19 +187,19 @@ if elem == 'marked': blockdict[r][key].remove(elem) """ - #print blockdict + #print blockdict - #make sure that the blockdict has keys for both the species + #make sure that the blockdict has keys for both the species if (1 not in blockdict) or (2 not in blockdict): continue visited_2 = [0 for x in range(len(blockdict[2]['starts']))] - for ind1,coord_s1 in enumerate(blockdict[1]['starts']): + for ind1, coord_s1 in enumerate(blockdict[1]['starts']): if coord_s1 == 'marked': continue coord_e1 = blockdict[1]['ends'][ind1] out = [] - for ind2,coord_s2 in enumerate(blockdict[2]['starts']): + for ind2, coord_s2 in enumerate(blockdict[2]['starts']): if coord_s2 == 'marked': visited_2[ind2] = 1 continue @@ -216,7 +211,7 @@ else: if (blockdict[1]['units'][ind1] not in blockdict[2]['units'][ind2]*2) and (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2): continue - #print >>sys.stderr, (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2) + #print >> sys.stderr, (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2) #skip if the repeat number thresholds are not met if blockdict[1]['types'][ind1] == 'mononucleotide': if (int(blockdict[1]['counts'][ind1]) < mono_threshold): @@ -231,12 +226,12 @@ else: if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold): continue - #print "s1,e1=%s,%s; s2,e2=%s,%s" %(coord_s1,coord_e1,coord_s2,coord_e2) - if (coord_s1 in range(coord_s2,coord_e2)) or (coord_e1 in range(coord_s2,coord_e2)): + #print "s1,e1=%s,%s; s2,e2=%s,%s" % ( coord_s1, coord_e1, coord_s2, coord_e2 ) + if (coord_s1 in range(coord_s2, coord_e2)) or (coord_e1 in range(coord_s2, coord_e2)): out.append(str(block_num)) out.append(namelist[0]) rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1] - rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) + rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) out.append(str(rel_start)) out.append(str(rel_end)) out.append(blockdict[1]['types'][ind1]) @@ -245,16 +240,16 @@ out.append(blockdict[1]['units'][ind1]) out.append(namelist[1]) rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2] - rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) + rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) out.append(str(rel_start)) out.append(str(rel_end)) out.append(blockdict[2]['types'][ind2]) out.append(blockdict[2]['lengths'][ind2]) out.append(blockdict[2]['counts'][ind2]) out.append(blockdict[2]['units'][ind2]) - print >>fout, '\t'.join(out) + print >> fout, '\t'.join(out) visited_2[ind2] = 1 - out=[] + out = [] if 0 in visited_2: #there are still some elements in 2nd set which haven't found orthologs yet. for ind2, coord_s2 in enumerate(blockdict[2]['starts']): @@ -264,7 +259,7 @@ continue coord_e2 = blockdict[2]['ends'][ind2] out = [] - for ind1,coord_s1 in enumerate(blockdict[1]['starts']): + for ind1, coord_s1 in enumerate(blockdict[1]['starts']): if coord_s1 == 'marked': continue coord_e1 = blockdict[1]['ends'][ind1] @@ -290,11 +285,11 @@ if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold): continue - if (coord_s2 in range(coord_s1,coord_e1)) or (coord_e2 in range(coord_s1,coord_e1)): - out.append(str(block_num)) + if (coord_s2 in range(coord_s1, coord_e1)) or (coord_e2 in range(coord_s1, coord_e1)): + out.append(str(block_num)) out.append(namelist[0]) rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1] - rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) + rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) out.append(str(rel_start)) out.append(str(rel_end)) out.append(blockdict[1]['types'][ind1]) @@ -303,21 +298,21 @@ out.append(blockdict[1]['units'][ind1]) out.append(namelist[1]) rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2] - rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) + rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) out.append(str(rel_start)) out.append(str(rel_end)) out.append(blockdict[2]['types'][ind2]) out.append(blockdict[2]['lengths'][ind2]) out.append(blockdict[2]['counts'][ind2]) out.append(blockdict[2]['units'][ind2]) - print >>fout, '\t'.join(out) + print >> fout, '\t'.join(out) visited_2[ind2] = 1 - out=[] + out = [] - #print >>fout, blockdict + #print >> fout, blockdict except Exception, exc: - print >>sys.stderr, "type(exc),args,exc: %s, %s, %s" %(type(exc), exc.args, exc) + print >> sys.stderr, "type(exc),args,exc: %s, %s, %s" % ( type(exc), exc.args, exc ) + if __name__ == "__main__": main() - diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/microsats_mutability.py --- a/tools/regVariation/microsats_mutability.py +++ b/tools/regVariation/microsats_mutability.py @@ -4,7 +4,10 @@ This tool computes microsatellite mutability for the orthologous microsatellites fetched from 'Extract Orthologous Microsatellites from pair-wise alignments' tool. """ from galaxy import eggs -import sys, string, re, commands, tempfile, os, fileinput +import fileinput +import string +import sys +import tempfile from galaxy.tools.util.galaxyops import * from bx.intervals.io import * from bx.intervals.operations import quicksect @@ -19,7 +22,7 @@ p_group_cols = [p_group, p_group+7] s_group_cols = [s_group, s_group+7] num_generations = int(sys.argv[7]) -region = sys.argv[8] +region = sys.argv[8] int_file = sys.argv[9] if int_file != "None": #User has specified an interval file try: @@ -28,31 +31,35 @@ chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[11] ) except: stop_err("Unable to open input Interval file") - + + def stop_err(msg): sys.stderr.write(msg) sys.exit() + def reverse_complement(text): DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" ) comp = [ch for ch in text.translate(DNA_COMP)] comp.reverse() return "".join(comp) + def get_unique_elems(elems): - seen=set() + seen = set() return[x for x in elems if x not in seen and not seen.add(x)] + def get_binned_lists(uniqlist, binsize): - binnedlist=[] + binnedlist = [] uniqlist.sort() start = int(uniqlist[0]) - bin_ind=0 - l_ind=0 + bin_ind = 0 + l_ind = 0 binnedlist.append([]) while l_ind < len(uniqlist): elem = int(uniqlist[l_ind]) - if elem in range(start,start+binsize): + if elem in range(start, start+binsize): binnedlist[bin_ind].append(elem) else: start += binsize @@ -62,39 +69,38 @@ l_ind += 1 return binnedlist -def fetch_weight(H,C,t): + +def fetch_weight(H, C, t): if (H-(C-H)) < t: return 2.0 else: return 1.0 -def mutabilityEstimator(repeats1,repeats2,thresholds): + +def mutabilityEstimator(repeats1, repeats2, thresholds): mut_num = 0.0 #Mutability Numerator mut_den = 0.0 #Mutability denominator - for ind,H in enumerate(repeats1): + for ind, H in enumerate(repeats1): C = repeats2[ind] t = thresholds[ind] - w = fetch_weight(H,C,t) + w = fetch_weight(H, C, t) mut_num += ((H-C)*(H-C)*w) mut_den += w return [mut_num, mut_den] + def output_writer(blk, blk_lines): global winspecies, speciesind - all_elems_1=[] - all_elems_2=[] - all_s_elems_1=[] - all_s_elems_2=[] + all_elems_1 = [] + all_elems_2 = [] + all_s_elems_1 = [] + all_s_elems_2 = [] for bline in blk_lines: if not(bline): continue items = bline.split('\t') seq1 = items[1] - start1 = items[2] - end1 = items[3] seq2 = items[8] - start2 = items[9] - end2 = items[10] if p_group_cols[0] == 6: items[p_group_cols[0]] = int(items[p_group_cols[0]]) items[p_group_cols[1]] = int(items[p_group_cols[1]]) @@ -111,8 +117,8 @@ if s_group_cols[0] != -1: uniq_s_elems_1 = get_unique_elems(all_s_elems_1) uniq_s_elems_2 = get_unique_elems(all_s_elems_2) - mut1={} - mut2={} + mut1 = {} + mut2 = {} count1 = {} count2 = {} """ @@ -120,12 +126,12 @@ uniq_elems_1 = get_unique_units(j.sort(lambda x, y: len(x)-len(y))) """ if p_group_cols[0] == 6: #i.e. the option chosen is group-by repeat number. - uniq_elems_1 = get_binned_lists(uniq_elems_1,p_bin_size) - uniq_elems_2 = get_binned_lists(uniq_elems_2,p_bin_size) + uniq_elems_1 = get_binned_lists( uniq_elems_1, p_bin_size ) + uniq_elems_2 = get_binned_lists( uniq_elems_2, p_bin_size ) if s_group_cols[0] == 6: #i.e. the option chosen is subgroup-by repeat number. - uniq_s_elems_1 = get_binned_lists(uniq_s_elems_1,s_bin_size) - uniq_s_elems_2 = get_binned_lists(uniq_s_elems_2,s_bin_size) + uniq_s_elems_1 = get_binned_lists( uniq_s_elems_1, s_bin_size ) + uniq_s_elems_2 = get_binned_lists( uniq_s_elems_2, s_bin_size ) for pitem1 in uniq_elems_1: #repeats1 = [] @@ -143,61 +149,61 @@ if p_group_cols[0] == 6: belems[p_group_cols[0]] = int(belems[p_group_cols[0]]) if belems[p_group_cols[0]] in pitem1: - if belems[s_group_cols[0]]==sitem1: + if belems[s_group_cols[0]] == sitem1: repeats1.append(int(belems[6])) repeats2.append(int(belems[13])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut1[str(pitem1)+'\t'+str(sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds) + mut1[str(pitem1)+'\t'+str(sitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds ) if region == 'align': - count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2)) - else: + count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1) + count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1) elif winspecies == 2: - count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2) + count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2) else: if type(sitem1) == list: if s_group_cols[0] == 6: belems[s_group_cols[0]] = int(belems[s_group_cols[0]]) - if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]] in sitem1: + if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] in sitem1: repeats1.append(int(belems[6])) repeats2.append(int(belems[13])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds) + mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds ) if region == 'align': - count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2)) - else: + count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats1) + count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats1) elif winspecies == 2: - count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats2) + count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats2) else: - if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]]==sitem1: + if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] == sitem1: repeats1.append(int(belems[6])) repeats2.append(int(belems[13])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds) + mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds ) if region == 'align': - count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2)) - else: + count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1) + count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1) elif winspecies == 2: - count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2) + count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2) else: #Sub-group by feature is None for bline in blk_lines: belems = bline.split('\t') if type(pitem1) == list: - #print >>sys.stderr, "item: " + str(item1) + #print >> sys.stderr, "item: " + str(item1) if p_group_cols[0] == 6: belems[p_group_cols[0]] = int(belems[p_group_cols[0]]) if belems[p_group_cols[0]] in pitem1: @@ -208,21 +214,21 @@ else: thresholds.append(non_mono_threshold) else: - if belems[p_group_cols[0]]==pitem1: + if belems[p_group_cols[0]] == pitem1: repeats1.append(int(belems[6])) repeats2.append(int(belems[13])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut1["%s" %(pitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds) + mut1["%s" % (pitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds ) if region == 'align': - count1["%s" %(pitem1)]=min(sum(repeats1),sum(repeats2)) - else: + count1["%s" % (pitem1)] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count1[str(pitem1)]=sum(repeats1) + count1[str(pitem1)] = sum(repeats1) elif winspecies == 2: - count1[str(pitem1)]=sum(repeats2) + count1[str(pitem1)] = sum(repeats2) for pitem2 in uniq_elems_2: #repeats1 = [] @@ -239,57 +245,57 @@ if type(pitem2) == list: if p_group_cols[0] == 6: belems[p_group_cols[1]] = int(belems[p_group_cols[1]]) - if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]]==sitem2: + if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]] == sitem2: repeats2.append(int(belems[13])) repeats1.append(int(belems[6])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds) + mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds ) #count2[str(pitem2)+'\t'+str(sitem2)]=len(repeats2) if region == 'align': - count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2)) - else: + count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2) elif winspecies == 2: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1) else: if type(sitem2) == list: if s_group_cols[0] == 6: belems[s_group_cols[1]] = int(belems[s_group_cols[1]]) - if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]] in sitem2: + if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] in sitem2: repeats2.append(int(belems[13])) repeats1.append(int(belems[6])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds) + mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds ) if region == 'align': - count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2)) - else: + count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2) elif winspecies == 2: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1) else: - if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]]==sitem2: + if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] == sitem2: repeats1.append(int(belems[13])) repeats2.append(int(belems[6])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds) + mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds ) if region == 'align': - count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2)) - else: + count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2) elif winspecies == 2: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1) else: #Sub-group by feature is None for bline in blk_lines: belems = bline.split('\t') @@ -304,21 +310,21 @@ else: thresholds.append(non_mono_threshold) else: - if belems[p_group_cols[1]]==pitem2: + if belems[p_group_cols[1]] == pitem2: repeats2.append(int(belems[13])) repeats1.append(int(belems[6])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut2["%s" %(pitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds) + mut2["%s" % (pitem2)] = mutabilityEstimator( repeats2, repeats1, thresholds ) if region == 'align': - count2["%s" %(pitem2)]=min(sum(repeats1),sum(repeats2)) + count2["%s" % (pitem2)] = min( sum(repeats1), sum(repeats2) ) else: if winspecies == 1: - count2["%s" %(pitem2)]=sum(repeats2) + count2["%s" % (pitem2)] = sum(repeats2) elif winspecies == 2: - count2["%s" %(pitem2)]=sum(repeats1) + count2["%s" % (pitem2)] = sum(repeats1) for key in mut1.keys(): if key in mut2.keys(): mut = (mut1[key][0]+mut2[key][0])/(mut1[key][1]+mut2[key][1]) @@ -328,9 +334,9 @@ unit_found = False if p_group_cols[0] == 7 or s_group_cols[0] == 7: #if it is Repeat Unit (AG, GCT etc.) check for reverse-complements too if p_group_cols[0] == 7: - this,other = 0,1 + this, other = 0, 1 else: - this,other = 1,0 + this, other = 1, 0 groups1 = key.split('\t') mutn = mut1[key][0] mutd = mut1[key][1] @@ -351,28 +357,29 @@ else: mut = mut1[key][0]/mut1[key][1] count = count1[key] - mut = "%.2e" %(mut/num_generations) + mut = "%.2e" % (mut/num_generations) if region == 'align': - print >>fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count) + print >> fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count) elif region == 'win': - fout.write("%s\t%s\t%s\t%s\n" %(blk,key.strip(),mut,count)) + fout.write("%s\t%s\t%s\t%s\n" % ( blk, key.strip(), mut, count )) fout.flush() #catch any remaining repeats, for instance if the orthologous position contained different repeat units for remaining_key in mut2.keys(): mut = mut2[remaining_key][0]/mut2[remaining_key][1] - mut = "%.2e" %(mut/num_generations) + mut = "%.2e" % (mut/num_generations) count = count2[remaining_key] if region == 'align': - print >>fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count) + print >> fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count) elif region == 'win': - fout.write("%s\t%s\t%s\t%s\n" %(blk,remaining_key.strip(),mut,count)) + fout.write("%s\t%s\t%s\t%s\n" % ( blk, remaining_key.strip(), mut, count )) fout.flush() - #print >>fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count) + #print >> fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count) + def counter(node, start, end, report_func): if start <= node.start < end and start < node.end <= end: - report_func(node) + report_func(node) if node.right: counter(node.right, start, end, report_func) if node.left: @@ -381,8 +388,8 @@ counter(node.right, start, end, report_func) elif node.start >= end and node.left and node.left.maxend > start: counter(node.left, start, end, report_func) - - + + def main(): infile = sys.argv[1] @@ -400,21 +407,18 @@ if region == 'win': if dbkey_i in elems[1]: winspecies = 1 - speciesind = 1 + speciesind = 1 elif dbkey_i in elems[8]: winspecies = 2 speciesind = 8 else: - stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.") + stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.") fin = open(infile, 'r') skipped = 0 - blk=0 - win=0 - linestr="" + linestr = "" if region == 'win': - msats = NiceReaderWrapper( fileinput.FileInput( infile ), chrom_col = speciesind, start_col = speciesind+1, @@ -435,7 +439,7 @@ ichr = ielems[chr_col_i] istart = int(ielems[start_col_i]) iend = int(ielems[end_col_i]) - isrc = "%s.%s" %(dbkey_i,ichr) + isrc = "%s.%s" % ( dbkey_i, ichr ) if isrc not in msatTree.chroms: continue result = [] @@ -450,14 +454,14 @@ tmpfile1.seek(0) output_writer(iline, tmpfile1.readlines()) except: - skipped+=1 + skipped += 1 if skipped: - print "Skipped %d intervals as invalid." %(skipped) + print "Skipped %d intervals as invalid." % (skipped) elif region == 'align': if s_group_cols[0] != -1: - print >>fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount" + print >> fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount" else: - print >>fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount" + print >> fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount" prev_bnum = -1 try: for line in fin: @@ -481,9 +485,11 @@ prev_bnum = new_bnum output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n')) except Exception, ea: - print >>sys.stderr, ea + print >> sys.stderr, ea skipped += 1 if skipped: - print "Skipped %d lines as invalid." %(skipped) + print "Skipped %d lines as invalid." % (skipped) + + if __name__ == "__main__": - main() \ No newline at end of file + main() diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 935942afdf706951fb45faf5468e3e12c25fb28f tools/regVariation/partialR_square.py --- a/tools/regVariation/partialR_square.py +++ b/tools/regVariation/partialR_square.py @@ -2,7 +2,7 @@ from galaxy import eggs -import sys, string +import sys from rpy import * import numpy @@ -13,6 +13,7 @@ sys.stderr.write(msg) sys.exit() + def sscombs(s): if len(s) == 1: return [s] @@ -26,14 +27,14 @@ x_cols = sys.argv[3].split(',') outfile = sys.argv[4] -print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1) +print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 ) fout = open(outfile,'w') for i, line in enumerate( file ( infile )): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): elems = line.split( '\t' ) - break + break if i == 30: break # Hopefully we'll never get here... @@ -43,7 +44,7 @@ y_vals = [] x_vals = [] -for k,col in enumerate(x_cols): +for k, col in enumerate(x_cols): x_cols[k] = int(col)-1 x_vals.append([]) """ @@ -51,13 +52,13 @@ float( elems[x_cols[k]] ) except: try: - msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." %( col, elems[x_cols[k]] ) + msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." % ( col, elems[x_cols[k]] ) except: msg = "This operation cannot be performed on non-numeric data." stop_err( msg ) """ NA = 'NA' -for ind,line in enumerate( file( infile )): +for ind, line in enumerate( file( infile )): if line and not line.startswith( '#' ): try: fields = line.split("\t") @@ -65,20 +66,20 @@ yval = float(fields[y_col]) except Exception, ey: yval = r('NA') - #print >>sys.stderr, "ey = %s" %ey + #print >> sys.stderr, "ey = %s" %ey y_vals.append(yval) - for k,col in enumerate(x_cols): + for k, col in enumerate(x_cols): try: xval = float(fields[col]) except Exception, ex: xval = r('NA') - #print >>sys.stderr, "ex = %s" %ex + #print >> sys.stderr, "ex = %s" %ex x_vals[k].append(xval) except: pass x_vals1 = numpy.asarray(x_vals).transpose() -dat= r.list(x=array(x_vals1), y=y_vals) +dat = r.list(x=array(x_vals1), y=y_vals) set_default_mode(NO_CONVERSION) try: @@ -91,7 +92,7 @@ fullr2 = summary.get('r.squared','NA') if fullr2 == 'NA': - stop_error("Error in linear regression") + stop_err("Error in linear regression") if len(x_vals) < 10: s = "" @@ -100,10 +101,10 @@ else: stop_err("This tool only works with less than 10 predictors.") -print >>fout, "#Model\tR-sq\tpartial_R_Terms\tpartial_R_Value" +print >> fout, "#Model\tR-sq\tpartial_R_Terms\tpartial_R_Value" all_combos = sorted(sscombs(s), key=len) all_combos.reverse() -for j,cols in enumerate(all_combos): +for j, cols in enumerate(all_combos): #if len(cols) == len(s): #Same as the full model above # continue if len(cols) == 1: @@ -113,7 +114,7 @@ for col in cols: x_v.append(x_vals[int(col)]) x_vals1 = numpy.asarray(x_v).transpose() - dat= r.list(x=array(x_vals1), y=y_vals) + dat = r.list(x=array(x_vals1), y=y_vals) set_default_mode(NO_CONVERSION) red = r.lm(r("y ~ x"), data= dat) #Reduced model set_default_mode(BASIC_CONVERSION) @@ -136,11 +137,11 @@ partial_R_col_str = "-" partial_R = "-" try: - redr2 = "%.4f" %(float(redr2)) + redr2 = "%.4f" % (float(redr2)) except: pass try: - partial_R = "%.4f" %(float(partial_R)) + partial_R = "%.4f" % (float(partial_R)) except: pass - print >>fout, "%s\t%s\t%s\t%s" %(col_str,redr2,partial_R_col_str,partial_R) + print >> fout, "%s\t%s\t%s\t%s" % ( col_str, redr2, partial_R_col_str, partial_R ) This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/82d2c2109792/ Changeset: 82d2c2109792 User: dannon Date: 2014-02-04 16:33:40 Summary: Merged in nsoranzo/galaxy-central (pull request #312) Remove unused imports and unused variables. Fix spacing. Affected #: 17 files diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/WeightedAverage.py --- a/tools/regVariation/WeightedAverage.py +++ b/tools/regVariation/WeightedAverage.py @@ -1,20 +1,16 @@ #!/usr/bin/env python """ - usage: %prog bed_file_1 bed_file_2 out_file -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file """ -from galaxy import eggs import collections -import sys, string +import sys #import numpy from galaxy import eggs import pkg_resources pkg_resources.require( "bx-python" ) -import sys, traceback, fileinput -from warnings import warn from galaxy.tools.util.galaxyops import * from bx.cookbook import doc_optparse @@ -27,77 +23,72 @@ sys.exit() -def FindRate(chromosome,start_stop,dictType): - OverlapList=[] +def FindRate(chromosome, start_stop, dictType): + OverlapList = [] for tempO in dictType[chromosome]: - DatabaseInterval=[tempO[0],tempO[1]] - Overlap=GetOverlap(start_stop,DatabaseInterval) - if Overlap>0: - OverlapList.append([Overlap,tempO[2]]) - - if len(OverlapList)>0: - - SumRecomb=0 - SumOverlap=0 + DatabaseInterval = [tempO[0], tempO[1]] + Overlap = GetOverlap( start_stop, DatabaseInterval ) + if Overlap > 0: + OverlapList.append([Overlap, tempO[2]]) + + if len(OverlapList) > 0: + SumRecomb = 0 + SumOverlap = 0 for member in OverlapList: - SumRecomb+=member[0]*member[1] - SumOverlap+=member[0] - averageRate=SumRecomb/SumOverlap - + SumRecomb += member[0]*member[1] + SumOverlap += member[0] + averageRate = SumRecomb/SumOverlap return averageRate - else: return 'NA' - - - -def GetOverlap(a,b): - return min(a[1],b[1])-max(a[0],b[0]) + + +def GetOverlap(a, b): + return min(a[1], b[1])-max(a[0], b[0]) + options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 ) - chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 ) + chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 ) input1, input2, input3 = args except Exception, eee: print eee stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) - +fd2 = open(input2) +lines2 = fd2.readlines() +RecombChrDict = collections.defaultdict(list) -fd2=open(input2) -lines2=fd2.readlines() -RecombChrDict=collections.defaultdict(list) - -skipped=0 +skipped = 0 for line in lines2: - temp=line.strip().split() + temp = line.strip().split() try: assert float(temp[int(name_col_2)]) except: - skipped+=1 + skipped += 1 continue - tempIndex=[int(temp[int(start_col_2)]),int(temp[int(end_col_2)]),float(temp[int(name_col_2)])] + tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])] RecombChrDict[temp[int(chr_col_2)]].append(tempIndex) -print "Skipped %d features with invalid values" %(skipped) +print "Skipped %d features with invalid values" % (skipped) -fd1=open(input1) -lines=fd1.readlines() -finalProduct='' +fd1 = open(input1) +lines = fd1.readlines() +finalProduct = '' for line in lines: - temp=line.strip().split('\t') - chromosome=temp[int(chr_col_1)] - start=int(temp[int(start_col_1)]) - stop=int(temp[int(end_col_1)]) - start_stop=[start,stop] - RecombRate=FindRate(chromosome,start_stop,RecombChrDict) + temp = line.strip().split('\t') + chromosome = temp[int(chr_col_1)] + start = int(temp[int(start_col_1)]) + stop = int(temp[int(end_col_1)]) + start_stop = [start, stop] + RecombRate = FindRate( chromosome, start_stop, RecombChrDict ) try: - RecombRate="%.4f" %(float(RecombRate)) + RecombRate = "%.4f" % (float(RecombRate)) except: - RecombRate=RecombRate - finalProduct+=line.strip()+'\t'+str(RecombRate)+'\n' -fdd=open(input3,'w') + RecombRate = RecombRate + finalProduct += line.strip()+'\t'+str(RecombRate)+'\n' +fdd = open(input3, 'w') fdd.writelines(finalProduct) fdd.close() diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/best_regression_subsets.py --- a/tools/regVariation/best_regression_subsets.py +++ b/tools/regVariation/best_regression_subsets.py @@ -2,7 +2,7 @@ from galaxy import eggs -import sys, string +import sys from rpy import * import numpy @@ -10,19 +10,20 @@ sys.stderr.write(msg) sys.exit() + infile = sys.argv[1] y_col = int(sys.argv[2])-1 x_cols = sys.argv[3].split(',') outfile = sys.argv[4] outfile2 = sys.argv[5] -print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1) +print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 ) fout = open(outfile,'w') for i, line in enumerate( file ( infile )): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): elems = line.split( '\t' ) - break + break if i == 30: break # Hopefully we'll never get here... @@ -32,12 +33,12 @@ y_vals = [] x_vals = [] -for k,col in enumerate(x_cols): +for k, col in enumerate(x_cols): x_cols[k] = int(col)-1 x_vals.append([]) NA = 'NA' -for ind,line in enumerate( file( infile )): +for ind, line in enumerate( file( infile ) ): if line and not line.startswith( '#' ): try: fields = line.split("\t") @@ -46,7 +47,7 @@ except Exception, ey: yval = r('NA') y_vals.append(yval) - for k,col in enumerate(x_cols): + for k, col in enumerate(x_cols): try: xval = float(fields[col]) except Exception, ex: @@ -59,10 +60,10 @@ x_vals1 = numpy.asarray(x_vals).transpose() -dat= r.list(x=array(x_vals1), y=y_vals) +dat = r.list(x=array(x_vals1), y=y_vals) r.library("leaps") - + set_default_mode(NO_CONVERSION) try: leaps = r.regsubsets(r("y ~ x"), data= r.na_exclude(dat)) @@ -75,10 +76,10 @@ pattern = "[" for i in range(tot): pattern = pattern + 'c' + str(int(x_cols[int(i)]) + 1) + ' ' -pattern = pattern.strip() + ']' -print >>fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" %(pattern) -for ind,item in enumerate(summary['outmat']): - print >>fout, "%s\t%s\t%s\t%s\t%s\t%s" %(str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind]) +pattern = pattern.strip() + ']' +print >> fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" % (pattern) +for ind, item in enumerate(summary['outmat']): + print >> fout, "%s\t%s\t%s\t%s\t%s\t%s" % (str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind]) r.pdf( outfile2, 8, 8 ) diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/featureCounter.py --- a/tools/regVariation/featureCounter.py +++ b/tools/regVariation/featureCounter.py @@ -11,8 +11,7 @@ from galaxy import eggs import pkg_resources pkg_resources.require( "bx-python" ) -import sys, traceback, fileinput -from warnings import warn +import sys, fileinput from bx.intervals.io import * from bx.cookbook import doc_optparse from bx.intervals.operations import quicksect @@ -33,7 +32,7 @@ partial += 1 if node.left and node.left.maxend > start: counter(node.left, start, end) - if node.right: + if node.right: counter(node.right, start, end) elif start < node.start < end: if node.end <= end: @@ -42,10 +41,10 @@ partial += 1 if node.left and node.left.maxend > start: counter(node.left, start, end) - if node.right: + if node.right: counter(node.right, start, end) else: - if node.left: + if node.left: counter(node.left, start, end) def count_coverage( readers, comments=True ): @@ -58,8 +57,8 @@ if type( item ) is GenomicInterval: rightTree.insert( item, secondary.linenum, item.fields ) - bitsets = secondary_copy.binned_bitsets() - + bitsets = secondary_copy.binned_bitsets() + global full, partial for interval in primary: @@ -82,7 +81,7 @@ bases_covered = bitsets[ chrom ].count_range( start, end-start ) if (end - start) == 0: percent = 0 - else: + else: percent = float(bases_covered) / float(end - start) if bases_covered: root = rightTree.chroms[chrom] #root node for the chrom tree @@ -92,13 +91,14 @@ interval.fields.append(str(full)) interval.fields.append(str(partial)) yield interval - + + def main(): options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) - chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) + chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) in1_fname, in2_fname, out_fname = args except: stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) @@ -126,7 +126,7 @@ out_file = open( out_fname, "w" ) try: - for line in count_coverage([g1,g2,g2_copy]): + for line in count_coverage([g1, g2, g2_copy]): if type( line ) is GenomicInterval: out_file.write( "%s\n" % "\t".join( line.fields ) ) else: @@ -143,6 +143,7 @@ print skipped( g2, filedesc=" of 2nd dataset" ) elif g2_copy.skipped > 0: print skipped( g2_copy, filedesc=" of 2nd dataset" ) - + + if __name__ == "__main__": main() diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/getIndelRates_3way.py --- a/tools/regVariation/getIndelRates_3way.py +++ b/tools/regVariation/getIndelRates_3way.py @@ -6,7 +6,6 @@ pkg_resources.require( "bx-python" ) import sys, os, tempfile -import traceback import fileinput from warnings import warn @@ -18,7 +17,8 @@ def stop_err(msg): sys.stderr.write(msg) sys.exit() - + + def counter(node, start, end, sort_col): global full, blk_len, blk_list if node.start < start: @@ -31,14 +31,14 @@ blk_len += int(node.other[sort_col+2]) if node.left and node.left.maxend > start: counter(node.left, start, end, sort_col) - if node.right: + if node.right: counter(node.right, start, end, sort_col) elif node.start > end: - if node.left: + if node.left: counter(node.left, start, end, sort_col) - -infile = sys.argv[1] + +infile = sys.argv[1] fout = open(sys.argv[2],'w') int_file = sys.argv[3] if int_file != "None": #User has specified an interval file @@ -48,9 +48,9 @@ chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] ) except: stop_err("Unable to open input Interval file") - + + def main(): - for i, line in enumerate( file ( infile )): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): @@ -86,8 +86,7 @@ break except: continue - - + fin = open(infile, 'r') skipped = 0 @@ -98,7 +97,7 @@ os.system(cmdline) except: stop_err("Encountered error while sorting the input file.") - print >>fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" %(species[0],species[1],species[2],species[0],species[1],species[2]) + print >> fout, "#Block\t%s_InsRate\t%s_InsRate\t%s_InsRate\t%s_DelRate\t%s_DelRate\t%s_DelRate" % ( species[0], species[1], species[2], species[0], species[1], species[2] ) prev_bnum = -1 sorted_infile.seek(0) for line in sorted_infile.readlines(): @@ -112,16 +111,16 @@ if prev_bnum != -1: irate = [] drate = [] - for i,elem in enumerate(inserts): + for i, elem in enumerate(inserts): try: - irate.append(str("%.2e" %(inserts[i]/blen[i]))) + irate.append(str("%.2e" % (inserts[i]/blen[i]))) except: irate.append('0') try: - drate.append(str("%.2e" %(deletes[i]/blen[i]))) + drate.append(str("%.2e" % (deletes[i]/blen[i]))) except: drate.append('0') - print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate)) + print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) ) inserts = [0.0, 0.0, 0.0] deletes = [0.0, 0.0, 0.0] blen = [] @@ -134,25 +133,24 @@ inserts[sp_ind] += 1 elif elems[1].endswith('delete'): deletes[sp_ind] += 1 - prev_bnum = new_bnum + prev_bnum = new_bnum except Exception, ei: #print >>sys.stderr, ei continue irate = [] drate = [] - for i,elem in enumerate(inserts): + for i, elem in enumerate(inserts): try: - irate.append(str("%.2e" %(inserts[i]/blen[i]))) + irate.append(str("%.2e" % (inserts[i]/blen[i]))) except: irate.append('0') try: - drate.append(str("%.2e" %(deletes[i]/blen[i]))) + drate.append(str("%.2e" % (deletes[i]/blen[i]))) except: drate.append('0') - print >>fout, "%s\t%s\t%s" %(prev_bnum, '\t'.join(irate) , '\t'.join(drate)) + print >> fout, "%s\t%s\t%s" % ( prev_bnum, '\t'.join(irate) , '\t'.join(drate) ) sys.exit() - inf = open(infile, 'r') start_met = False end_met = False @@ -163,14 +161,14 @@ try: assert int(elems[0]) assert len(elems) == 18 - if dbkey_i not in elems[1]: - if not(start_met): + if dbkey_i not in elems[1]: + if not(start_met): continue else: sp_end = n break else: - print >>sp_file, line + print >> sp_file, line if not(start_met): start_met = True sp_start = n @@ -201,7 +199,7 @@ for item in indel: if type( item ) is GenomicInterval: indelTree.insert( item, indel.linenum, item.fields ) - result=[] + result = [] global full, blk_len, blk_list for interval in win: @@ -213,14 +211,14 @@ chrom = interval.chrom start = int(interval.start) end = int(interval.end) - if start > end: + if start > end: warn( "Interval start after end!" ) - ins_chr = "%s.%s_insert" %(dbkey_i,chrom) - del_chr = "%s.%s_delete" %(dbkey_i,chrom) + ins_chr = "%s.%s_insert" % ( dbkey_i, chrom ) + del_chr = "%s.%s_delete" % ( dbkey_i, chrom ) irate = 0 drate = 0 if ins_chr not in indelTree.chroms and del_chr not in indelTree.chroms: - pass + pass else: if ins_chr in indelTree.chroms: full = 0.0 @@ -242,8 +240,9 @@ interval.fields.append(str("%.2e" %irate)) interval.fields.append(str("%.2e" %drate)) - print >>fout, "\t".join(interval.fields) + print >> fout, "\t".join(interval.fields) fout.flush() + if __name__ == "__main__": - main() \ No newline at end of file + main() diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/getIndels.py --- a/tools/regVariation/getIndels.py +++ b/tools/regVariation/getIndels.py @@ -8,13 +8,12 @@ from __future__ import division from galaxy import eggs -import pkg_resources +import pkg_resources pkg_resources.require( "bx-python" ) try: pkg_resources.require("numpy") except: pass -import psyco_full import sys from bx.cookbook import doc_optparse from galaxy.tools.exception_handling import * @@ -22,24 +21,24 @@ assert sys.version_info[:2] >= ( 2, 4 ) -def main(): +def main(): # Parsing Command Line here options, args = doc_optparse.parse( __doc__ ) try: - inp_file, out_file1 = args + inp_file, out_file1 = args except: print >> sys.stderr, "Tool initialization error." sys.exit() try: - fin = open(inp_file,'r') + open(inp_file, 'r') except: print >> sys.stderr, "Unable to open input file" sys.exit() try: - fout1 = open(out_file1,'w') - #fout2 = open(out_file2,'w') + fout1 = open(out_file1, 'w') + #fout2 = open(out_file2, 'w') except: print >> sys.stderr, "Unable to open output file" sys.exit() @@ -47,11 +46,10 @@ try: maf_reader = bx.align.maf.Reader( open(inp_file, 'r') ) except: - print >>sys.stderr, "Your MAF file appears to be malformed." + print >> sys.stderr, "Your MAF file appears to be malformed." sys.exit() - maf_count = 0 - print >>fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length" + print >> fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length" for block_ind, block in enumerate(maf_reader): if len(block.components) < 2: continue @@ -84,19 +82,19 @@ #write 2 if prev_pos_gap2 == 1: prev_pos_gap2 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1,nt_pos1+1,nt_pos2-1,nt_pos2-1+gaplen2,gaplen2) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1, nt_pos1+1, nt_pos2-1, nt_pos2-1+gaplen2, gaplen2 ) if pos == len(seq1)-1: - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1,nt_pos1+1,nt_pos2+1-gaplen1,nt_pos2+1,gaplen1) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1, nt_pos1+1, nt_pos2+1-gaplen1, nt_pos2+1, gaplen1 ) else: prev_pos_gap1 = 0 prev_pos_gap2 = 0 """ if prev_pos_gap1 == 1: prev_pos_gap1 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,gaplen1) + print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, gaplen1 ) elif prev_pos_gap2 == 1: prev_pos_gap2 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos2-1,nt_pos2,gaplen2) + print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos2-1, nt_pos2, gaplen2 ) """ else: nt_pos1 += 1 @@ -105,19 +103,21 @@ #write both if prev_pos_gap1 == 1: prev_pos_gap1 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2-gaplen1,nt_pos2,gaplen1) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2-gaplen1, nt_pos2, gaplen1 ) elif prev_pos_gap2 == 1: prev_pos_gap2 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1-gaplen2,nt_pos1,nt_pos2-1,nt_pos2,gaplen2) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1-gaplen2, nt_pos1, nt_pos2-1, nt_pos2, gaplen2 ) else: gaplen2 += 1 prev_pos_gap2 = 1 #write 1 if prev_pos_gap1 == 1: prev_pos_gap1 = 0 - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2,nt_pos2+gaplen1,gaplen1) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2, nt_pos2+gaplen1, gaplen1 ) if pos == len(seq1)-1: - print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1+1-gaplen2,nt_pos1+1,nt_pos2,nt_pos2+1,gaplen2) + print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1+1-gaplen2, nt_pos1+1, nt_pos2, nt_pos2+1, gaplen2 ) pos += 1 + + if __name__ == "__main__": main() diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/linear_regression.py --- a/tools/regVariation/linear_regression.py +++ b/tools/regVariation/linear_regression.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from galaxy import eggs -import sys, string +import sys from rpy import * import numpy @@ -15,14 +15,14 @@ outfile = sys.argv[4] outfile2 = sys.argv[5] -print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1) +print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 ) fout = open(outfile,'w') elems = [] for i, line in enumerate( file ( infile )): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): elems = line.split( '\t' ) - break + break if i == 30: break # Hopefully we'll never get here... @@ -32,12 +32,12 @@ y_vals = [] x_vals = [] -for k,col in enumerate(x_cols): +for k, col in enumerate(x_cols): x_cols[k] = int(col)-1 x_vals.append([]) NA = 'NA' -for ind,line in enumerate( file( infile )): +for ind, line in enumerate( file( infile )): if line and not line.startswith( '#' ): try: fields = line.split("\t") @@ -46,7 +46,7 @@ except: yval = r('NA') y_vals.append(yval) - for k,col in enumerate(x_cols): + for k, col in enumerate(x_cols): try: xval = float(fields[col]) except: @@ -57,7 +57,7 @@ x_vals1 = numpy.asarray(x_vals).transpose() -dat= r.list(x=array(x_vals1), y=y_vals) +dat = r.list(x=array(x_vals1), y=y_vals) set_default_mode(NO_CONVERSION) try: @@ -66,8 +66,8 @@ stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.") set_default_mode(BASIC_CONVERSION) -coeffs=linear_model.as_py()['coefficients'] -yintercept= coeffs['(Intercept)'] +coeffs = linear_model.as_py()['coefficients'] +yintercept = coeffs['(Intercept)'] summary = r.summary(linear_model) co = summary.get('coefficients', 'NA') @@ -82,8 +82,8 @@ except: pass -print >>fout, "Y-intercept\t%s" %(yintercept) -print >>fout, "p-value (Y-intercept)\t%s" %(pvaly) +print >> fout, "Y-intercept\t%s" % (yintercept) +print >> fout, "p-value (Y-intercept)\t%s" % (pvaly) if len(x_vals) == 1: #Simple linear regression case with 1 predictor variable try: @@ -94,22 +94,22 @@ pval = r.round(float(co[1][3]), digits=10) except: pval = 'NA' - print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope) - print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval) + print >> fout, "Slope (c%d)\t%s" % ( x_cols[0]+1, slope ) + print >> fout, "p-value (c%d)\t%s" % ( x_cols[0]+1, pval ) else: #Multiple regression case with >1 predictors - ind=1 + ind = 1 while ind < len(coeffs.keys()): try: slope = r.round(float(coeffs['x'+str(ind)]), digits=10) except: slope = 'NA' - print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,slope) + print >> fout, "Slope (c%d)\t%s" % ( x_cols[ind-1]+1, slope ) try: pval = r.round(float(co[ind][3]), digits=10) except: pval = 'NA' - print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval) - ind+=1 + print >> fout, "p-value (c%d)\t%s" % ( x_cols[ind-1]+1, pval ) + ind += 1 rsq = summary.get('r.squared','NA') adjrsq = summary.get('adj.r.squared','NA') @@ -125,14 +125,14 @@ except: pass -print >>fout, "R-squared\t%s" %(rsq) -print >>fout, "Adjusted R-squared\t%s" %(adjrsq) -print >>fout, "F-statistic\t%s" %(fstat) -print >>fout, "Sigma\t%s" %(sigma) +print >> fout, "R-squared\t%s" % (rsq) +print >> fout, "Adjusted R-squared\t%s" % (adjrsq) +print >> fout, "F-statistic\t%s" % (fstat) +print >> fout, "Sigma\t%s" % (sigma) r.pdf( outfile2, 8, 8 ) if len(x_vals) == 1: #Simple linear regression case with 1 predictor variable - sub_title = "Slope = %s; Y-int = %s" %(slope,yintercept) + sub_title = "Slope = %s; Y-int = %s" % ( slope, yintercept ) try: r.plot(x=x_vals[0], y=y_vals, xlab="X", ylab="Y", sub=sub_title, main="Scatterplot with regression") r.abline(a=yintercept, b=slope, col="red") diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/logistic_regression_vif.py --- a/tools/regVariation/logistic_regression_vif.py +++ b/tools/regVariation/logistic_regression_vif.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from galaxy import eggs -import sys, string +import sys from rpy import * import numpy @@ -15,14 +15,14 @@ outfile = sys.argv[4] -print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1) +print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 ) fout = open(outfile,'w') elems = [] -for i, line in enumerate( file ( infile )): +for i, line in enumerate( file( infile ) ): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): elems = line.split( '\t' ) - break + break if i == 30: break # Hopefully we'll never get here... @@ -32,12 +32,12 @@ y_vals = [] x_vals = [] -for k,col in enumerate(x_cols): +for k, col in enumerate(x_cols): x_cols[k] = int(col)-1 x_vals.append([]) NA = 'NA' -for ind,line in enumerate( file( infile )): +for ind, line in enumerate( file( infile )): if line and not line.startswith( '#' ): try: fields = line.split("\t") @@ -46,7 +46,7 @@ except: yval = r('NA') y_vals.append(yval) - for k,col in enumerate(x_cols): + for k, col in enumerate(x_cols): try: xval = float(fields[col]) except: @@ -57,46 +57,45 @@ x_vals1 = numpy.asarray(x_vals).transpose() -check1=0 -check0=0 +check1 = 0 +check0 = 0 for i in y_vals: if i == 1: - check1=1 + check1 = 1 if i == 0: - check0=1 -if check1==0 or check0==0: + check0 = 1 +if check1 == 0 or check0 == 0: sys.exit("Warning: logistic regression must have at least two classes") for i in y_vals: - if i not in [1,0,r('NA')]: - print >>fout, str(i) + if i not in [1, 0, r('NA')]: + print >> fout, str(i) sys.exit("Warning: the current version of this tool can run only with two classes and need to be labeled as 0 and 1.") - - -dat= r.list(x=array(x_vals1), y=y_vals) -novif=0 + +dat = r.list(x=array(x_vals1), y=y_vals) +novif = 0 set_default_mode(NO_CONVERSION) try: - linear_model = r.glm(r("y ~ x"), data = r.na_exclude(dat),family="binomial") + linear_model = r.glm(r("y ~ x"), data=r.na_exclude(dat), family="binomial") except RException, rex: stop_err("Error performing logistic regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.") if len(x_cols)>1: try: r('suppressPackageStartupMessages(library(car))') - r.assign('dat',dat) - r.assign('ncols',len(x_cols)) - vif=r.vif(r('glm(dat$y ~ ., data = na.exclude(data.frame(as.matrix(dat$x,ncol=ncols))->datx),family="binomial")')) + r.assign('dat', dat) + r.assign('ncols', len(x_cols)) + vif = r.vif(r('glm(dat$y ~ ., data = na.exclude(data.frame(as.matrix(dat$x,ncol=ncols))->datx), family="binomial")')) except RException, rex: print rex else: - novif=1 - + novif = 1 + set_default_mode(BASIC_CONVERSION) -coeffs=linear_model.as_py()['coefficients'] -null_deviance=linear_model.as_py()['null.deviance'] -residual_deviance=linear_model.as_py()['deviance'] -yintercept= coeffs['(Intercept)'] +coeffs = linear_model.as_py()['coefficients'] +null_deviance = linear_model.as_py()['null.deviance'] +residual_deviance = linear_model.as_py()['deviance'] +yintercept = coeffs['(Intercept)'] summary = r.summary(linear_model) co = summary.get('coefficients', 'NA') """ @@ -109,14 +108,14 @@ pvaly = r.round(float(co[0][3]), digits=10) except: pass -print >>fout, "response column\tc%d" %(y_col+1) -tempP=[] +print >> fout, "response column\tc%d" % (y_col+1) +tempP = [] for i in x_cols: tempP.append('c'+str(i+1)) -tempP=','.join(tempP) -print >>fout, "predictor column(s)\t%s" %(tempP) -print >>fout, "Y-intercept\t%s" %(yintercept) -print >>fout, "p-value (Y-intercept)\t%s" %(pvaly) +tempP = ','.join(tempP) +print >> fout, "predictor column(s)\t%s" % (tempP) +print >> fout, "Y-intercept\t%s" % (yintercept) +print >> fout, "p-value (Y-intercept)\t%s" % (pvaly) if len(x_vals) == 1: #Simple linear regression case with 1 predictor variable try: @@ -127,44 +126,43 @@ pval = r.round(float(co[1][3]), digits=10) except: pval = 'NA' - print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope) - print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval) + print >> fout, "Slope (c%d)\t%s" % ( x_cols[0]+1, slope ) + print >> fout, "p-value (c%d)\t%s" % ( x_cols[0]+1, pval ) else: #Multiple regression case with >1 predictors - ind=1 + ind = 1 while ind < len(coeffs.keys()): try: slope = r.round(float(coeffs['x'+str(ind)]), digits=10) except: slope = 'NA' - print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,slope) + print >> fout, "Slope (c%d)\t%s" % ( x_cols[ind-1]+1, slope ) try: pval = r.round(float(co[ind][3]), digits=10) except: pval = 'NA' - print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval) - ind+=1 + print >> fout, "p-value (c%d)\t%s" % ( x_cols[ind-1]+1, pval ) + ind += 1 rsq = summary.get('r.squared','NA') - try: - rsq= r.round(float((null_deviance-residual_deviance)/null_deviance), digits=5) - null_deviance= r.round(float(null_deviance), digits=5) - residual_deviance= r.round(float(residual_deviance), digits=5) + rsq = r.round(float((null_deviance-residual_deviance)/null_deviance), digits=5) + null_deviance = r.round(float(null_deviance), digits=5) + residual_deviance = r.round(float(residual_deviance), digits=5) except: pass -print >>fout, "Null deviance\t%s" %(null_deviance) -print >>fout, "Residual deviance\t%s" %(residual_deviance) -print >>fout, "pseudo R-squared\t%s" %(rsq) -print >>fout, "\n" -print >>fout, 'vif' +print >> fout, "Null deviance\t%s" % (null_deviance) +print >> fout, "Residual deviance\t%s" % (residual_deviance) +print >> fout, "pseudo R-squared\t%s" % (rsq) +print >> fout, "\n" +print >> fout, 'vif' -if novif==0: - py_vif=vif.as_py() - count=0 +if novif == 0: + py_vif = vif.as_py() + count = 0 for i in sorted(py_vif.keys()): - print >>fout,'c'+str(x_cols[count]+1) ,str(py_vif[i]) - count+=1 -elif novif==1: - print >>fout, "vif can calculate only when model have more than 1 predictor" + print >> fout, 'c'+str(x_cols[count]+1), str(py_vif[i]) + count += 1 +elif novif == 1: + print >> fout, "vif can calculate only when model have more than 1 predictor" diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/maf_cpg_filter.py --- a/tools/regVariation/maf_cpg_filter.py +++ b/tools/regVariation/maf_cpg_filter.py @@ -10,7 +10,7 @@ """ from galaxy import eggs -import pkg_resources +import pkg_resources pkg_resources.require( "bx-python" ) try: pkg_resources.require( "numpy" ) @@ -54,7 +54,7 @@ defn = "non-CpG" cpgfilter.run( reader, writer.write ) - print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn) + print "%2.2f percent bases masked; Mask character = %s, Definition = %s" % ( float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn ) if __name__ == "__main__": main() diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/microsats_alignment_level.py --- a/tools/regVariation/microsats_alignment_level.py +++ b/tools/regVariation/microsats_alignment_level.py @@ -4,7 +4,11 @@ Uses SPUTNIK to fetch microsatellites and extracts orthologous repeats from the sputnik output. """ from galaxy import eggs -import sys, os, tempfile, string, math, re +import os +import re +import string +import sys +import tempfile def reverse_complement(text): DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" ) @@ -12,31 +16,26 @@ comp.reverse() return "".join(comp) + def main(): if len(sys.argv) != 8: - print >>sys.stderr, "Insufficient number of arguments." + print >> sys.stderr, "Insufficient number of arguments." sys.exit() infile = open(sys.argv[1],'r') separation = int(sys.argv[2]) outfile = sys.argv[3] - align_type = sys.argv[4] - if align_type == "2way": - align_type_len = 2 - elif align_type == "3way": - align_type_len = 3 mono_threshold = int(sys.argv[5]) non_mono_threshold = int(sys.argv[6]) allow_different_units = int(sys.argv[7]) - print "Min distance = %d bp; Min threshold for mono repeats = %d; Min threshold for non-mono repeats = %d; Allow different motifs = %s" %(separation, mono_threshold, non_mono_threshold, allow_different_units==1) + print "Min distance = %d bp; Min threshold for mono repeats = %d; Min threshold for non-mono repeats = %d; Allow different motifs = %s" % ( separation, mono_threshold, non_mono_threshold, allow_different_units==1 ) try: fout = open(outfile, "w") - print >>fout, "#Block\tSeq1_Name\tSeq1_Start\tSeq1_End\tSeq1_Type\tSeq1_Length\tSeq1_RepeatNumber\tSeq1_Unit\tSeq2_Name\tSeq2_Start\tSeq2_End\tSeq2_Type\tSeq2_Length\tSeq2_RepeatNumber\tSeq2_Unit" + print >> fout, "#Block\tSeq1_Name\tSeq1_Start\tSeq1_End\tSeq1_Type\tSeq1_Length\tSeq1_RepeatNumber\tSeq1_Unit\tSeq2_Name\tSeq2_Start\tSeq2_End\tSeq2_Type\tSeq2_Length\tSeq2_RepeatNumber\tSeq2_Unit" #sputnik_cmd = os.path.join(os.path.split(sys.argv[0])[0], "sputnik") sputnik_cmd = "sputnik" input = infile.read() - skipped = 0 block_num = 0 input = input.replace('\r','\n') for block in input.split('\n\n'): @@ -44,26 +43,24 @@ tmpin = tempfile.NamedTemporaryFile() tmpout = tempfile.NamedTemporaryFile() tmpin.write(block.strip()) - blk = tmpin.read() cmdline = sputnik_cmd + " " + tmpin.name + " > /dev/null 2>&1 >> " + tmpout.name try: os.system(cmdline) - except Exception, es: + except Exception: continue sputnik_out = tmpout.read() tmpin.close() tmpout.close() if sputnik_out != "": if len(block.split('>')[1:]) != 2: #len(sputnik_out.split('>')): - skipped += 1 continue align_block = block.strip().split('>') lendict = {'mononucleotide':1, 'dinucleotide':2, 'trinucleotide':3, 'tetranucleotide':4, 'pentanucleotide':5, 'hexanucleotide':6} - blockdict={} - r=0 - namelist=[] - for k,sput_block in enumerate(sputnik_out.split('>')[1:]): + blockdict = {} + r = 0 + namelist = [] + for k, sput_block in enumerate(sputnik_out.split('>')[1:]): whole_seq = ''.join(align_block[k+1].split('\n')[1:]).replace('\n','').strip() p = re.compile('\n(\S*nucleotide)') repeats = p.split(sput_block.strip()) @@ -71,13 +68,12 @@ j = 1 name = repeats[0].strip() try: - coords = re.search('\d+[-_:]\d+',name).group() - coords = coords.replace('_','-').replace(':','-') - except Exception, e: + coords = re.search('\d+[-_:]\d+', name).group() + coords = coords.replace('_', '-').replace(':', '-') + except Exception: coords = '0-0' - pass r += 1 - blockdict[r]={} + blockdict[r] = {} try: sp_name = name[:name.index('.')] chr_name = name[name.index('.'):name.index('(')] @@ -91,11 +87,10 @@ continue if blockdict[r].has_key('types'): - blockdict[r]['types'].append(repeats[j].strip()) #type of microsat + blockdict[r]['types'].append(repeats[j].strip()) #type of microsat else: - blockdict[r]['types'] = [repeats[j].strip()] #type of microsat + blockdict[r]['types'] = [repeats[j].strip()] #type of microsat - sequence = ''.join(align_block[r].split('\n')[1:]).replace('\n','').strip() start = int(repeats[j+1].split('--')[0].split(':')[0].strip()) #check to see if there are gaps before the start of the repeat, and change the start accordingly sgaps = 0 @@ -107,7 +102,7 @@ break #break at the 1st non-gap character ch_pos -= 1 if blockdict[r].has_key('starts'): - blockdict[r]['starts'].append(start+sgaps) #start co-ords adjusted with alignment co-ords to include GAPS + blockdict[r]['starts'].append(start+sgaps) #start co-ords adjusted with alignment co-ords to include GAPS else: blockdict[r]['starts'] = [start+sgaps] @@ -120,7 +115,7 @@ else: break #break at the 1st non-gap character if blockdict[r].has_key('ends'): - blockdict[r]['ends'].append(end+egaps) #end co-ords adjusted with alignment co-ords to include GAPS + blockdict[r]['ends'].append(end+egaps) #end co-ords adjusted with alignment co-ords to include GAPS else: blockdict[r]['ends'] = [end+egaps] @@ -134,20 +129,20 @@ gaps_before_start = whole_seq[:rel_start].count('-') if blockdict[r].has_key('gaps_before_start'): - blockdict[r]['gaps_before_start'].append(gaps_before_start) #lengths + blockdict[r]['gaps_before_start'].append(gaps_before_start) #lengths else: - blockdict[r]['gaps_before_start'] = [gaps_before_start] #lengths + blockdict[r]['gaps_before_start'] = [gaps_before_start] #lengths - whole_seq_start= int(coords.split('-')[0]) + whole_seq_start = int(coords.split('-')[0]) if blockdict[r].has_key('whole_seq_start'): - blockdict[r]['whole_seq_start'].append(whole_seq_start) #lengths + blockdict[r]['whole_seq_start'].append(whole_seq_start) #lengths else: - blockdict[r]['whole_seq_start'] = [whole_seq_start] #lengths + blockdict[r]['whole_seq_start'] = [whole_seq_start] #lengths if blockdict[r].has_key('lengths'): - blockdict[r]['lengths'].append(repeat_len) #lengths + blockdict[r]['lengths'].append(repeat_len) #lengths else: - blockdict[r]['lengths'] = [repeat_len] #lengths + blockdict[r]['lengths'] = [repeat_len] #lengths if blockdict[r].has_key('counts'): blockdict[r]['counts'].append(str(int(repeat_len)/lendict[repeats[j].strip()])) #Repeat Unit @@ -159,10 +154,10 @@ else: blockdict[r]['units'] = [repeat_seq[:lendict[repeats[j].strip()]]] #Repeat Unit - except Exception, eh: + except Exception: pass - j+=2 - #check the co-ords of all repeats corresponding to a sequence and remove adjacent repeats separated by less than the user-specified 'separation'. + j += 2 + #check the co-ords of all repeats corresponding to a sequence and remove adjacent repeats separated by less than the user-specified 'separation'. delete_index_list = [] for ind, item in enumerate(blockdict[r]['ends']): try: @@ -171,7 +166,7 @@ delete_index_list.append(ind) if ind+1 not in delete_index_list: delete_index_list.append(ind+1) - except Exception, ek: + except Exception: pass for index in delete_index_list: #mark them for deletion try: @@ -183,7 +178,7 @@ blockdict[r]['lengths'][index] = 'marked' blockdict[r]['counts'][index] = 'marked' blockdict[r]['units'][index] = 'marked' - except Exception, ej: + except Exception: pass #remove 'marked' elements from all the lists """ @@ -192,19 +187,19 @@ if elem == 'marked': blockdict[r][key].remove(elem) """ - #print blockdict + #print blockdict - #make sure that the blockdict has keys for both the species + #make sure that the blockdict has keys for both the species if (1 not in blockdict) or (2 not in blockdict): continue visited_2 = [0 for x in range(len(blockdict[2]['starts']))] - for ind1,coord_s1 in enumerate(blockdict[1]['starts']): + for ind1, coord_s1 in enumerate(blockdict[1]['starts']): if coord_s1 == 'marked': continue coord_e1 = blockdict[1]['ends'][ind1] out = [] - for ind2,coord_s2 in enumerate(blockdict[2]['starts']): + for ind2, coord_s2 in enumerate(blockdict[2]['starts']): if coord_s2 == 'marked': visited_2[ind2] = 1 continue @@ -216,7 +211,7 @@ else: if (blockdict[1]['units'][ind1] not in blockdict[2]['units'][ind2]*2) and (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2): continue - #print >>sys.stderr, (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2) + #print >> sys.stderr, (reverse_complement(blockdict[1]['units'][ind1]) not in blockdict[2]['units'][ind2]*2) #skip if the repeat number thresholds are not met if blockdict[1]['types'][ind1] == 'mononucleotide': if (int(blockdict[1]['counts'][ind1]) < mono_threshold): @@ -231,12 +226,12 @@ else: if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold): continue - #print "s1,e1=%s,%s; s2,e2=%s,%s" %(coord_s1,coord_e1,coord_s2,coord_e2) - if (coord_s1 in range(coord_s2,coord_e2)) or (coord_e1 in range(coord_s2,coord_e2)): + #print "s1,e1=%s,%s; s2,e2=%s,%s" % ( coord_s1, coord_e1, coord_s2, coord_e2 ) + if (coord_s1 in range(coord_s2, coord_e2)) or (coord_e1 in range(coord_s2, coord_e2)): out.append(str(block_num)) out.append(namelist[0]) rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1] - rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) + rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) out.append(str(rel_start)) out.append(str(rel_end)) out.append(blockdict[1]['types'][ind1]) @@ -245,16 +240,16 @@ out.append(blockdict[1]['units'][ind1]) out.append(namelist[1]) rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2] - rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) + rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) out.append(str(rel_start)) out.append(str(rel_end)) out.append(blockdict[2]['types'][ind2]) out.append(blockdict[2]['lengths'][ind2]) out.append(blockdict[2]['counts'][ind2]) out.append(blockdict[2]['units'][ind2]) - print >>fout, '\t'.join(out) + print >> fout, '\t'.join(out) visited_2[ind2] = 1 - out=[] + out = [] if 0 in visited_2: #there are still some elements in 2nd set which haven't found orthologs yet. for ind2, coord_s2 in enumerate(blockdict[2]['starts']): @@ -264,7 +259,7 @@ continue coord_e2 = blockdict[2]['ends'][ind2] out = [] - for ind1,coord_s1 in enumerate(blockdict[1]['starts']): + for ind1, coord_s1 in enumerate(blockdict[1]['starts']): if coord_s1 == 'marked': continue coord_e1 = blockdict[1]['ends'][ind1] @@ -290,11 +285,11 @@ if (int(blockdict[2]['counts'][ind2]) < non_mono_threshold): continue - if (coord_s2 in range(coord_s1,coord_e1)) or (coord_e2 in range(coord_s1,coord_e1)): - out.append(str(block_num)) + if (coord_s2 in range(coord_s1, coord_e1)) or (coord_e2 in range(coord_s1, coord_e1)): + out.append(str(block_num)) out.append(namelist[0]) rel_start = blockdict[1]['whole_seq_start'][ind1] + coord_s1 - blockdict[1]['gaps_before_start'][ind1] - rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) + rel_end = rel_start + int(blockdict[1]['lengths'][ind1]) out.append(str(rel_start)) out.append(str(rel_end)) out.append(blockdict[1]['types'][ind1]) @@ -303,21 +298,21 @@ out.append(blockdict[1]['units'][ind1]) out.append(namelist[1]) rel_start = blockdict[2]['whole_seq_start'][ind2] + coord_s2 - blockdict[2]['gaps_before_start'][ind2] - rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) + rel_end = rel_start + int(blockdict[2]['lengths'][ind2]) out.append(str(rel_start)) out.append(str(rel_end)) out.append(blockdict[2]['types'][ind2]) out.append(blockdict[2]['lengths'][ind2]) out.append(blockdict[2]['counts'][ind2]) out.append(blockdict[2]['units'][ind2]) - print >>fout, '\t'.join(out) + print >> fout, '\t'.join(out) visited_2[ind2] = 1 - out=[] + out = [] - #print >>fout, blockdict + #print >> fout, blockdict except Exception, exc: - print >>sys.stderr, "type(exc),args,exc: %s, %s, %s" %(type(exc), exc.args, exc) + print >> sys.stderr, "type(exc),args,exc: %s, %s, %s" % ( type(exc), exc.args, exc ) + if __name__ == "__main__": main() - diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/microsats_mutability.py --- a/tools/regVariation/microsats_mutability.py +++ b/tools/regVariation/microsats_mutability.py @@ -4,7 +4,10 @@ This tool computes microsatellite mutability for the orthologous microsatellites fetched from 'Extract Orthologous Microsatellites from pair-wise alignments' tool. """ from galaxy import eggs -import sys, string, re, commands, tempfile, os, fileinput +import fileinput +import string +import sys +import tempfile from galaxy.tools.util.galaxyops import * from bx.intervals.io import * from bx.intervals.operations import quicksect @@ -19,7 +22,7 @@ p_group_cols = [p_group, p_group+7] s_group_cols = [s_group, s_group+7] num_generations = int(sys.argv[7]) -region = sys.argv[8] +region = sys.argv[8] int_file = sys.argv[9] if int_file != "None": #User has specified an interval file try: @@ -28,31 +31,35 @@ chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[11] ) except: stop_err("Unable to open input Interval file") - + + def stop_err(msg): sys.stderr.write(msg) sys.exit() + def reverse_complement(text): DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" ) comp = [ch for ch in text.translate(DNA_COMP)] comp.reverse() return "".join(comp) + def get_unique_elems(elems): - seen=set() + seen = set() return[x for x in elems if x not in seen and not seen.add(x)] + def get_binned_lists(uniqlist, binsize): - binnedlist=[] + binnedlist = [] uniqlist.sort() start = int(uniqlist[0]) - bin_ind=0 - l_ind=0 + bin_ind = 0 + l_ind = 0 binnedlist.append([]) while l_ind < len(uniqlist): elem = int(uniqlist[l_ind]) - if elem in range(start,start+binsize): + if elem in range(start, start+binsize): binnedlist[bin_ind].append(elem) else: start += binsize @@ -62,39 +69,38 @@ l_ind += 1 return binnedlist -def fetch_weight(H,C,t): + +def fetch_weight(H, C, t): if (H-(C-H)) < t: return 2.0 else: return 1.0 -def mutabilityEstimator(repeats1,repeats2,thresholds): + +def mutabilityEstimator(repeats1, repeats2, thresholds): mut_num = 0.0 #Mutability Numerator mut_den = 0.0 #Mutability denominator - for ind,H in enumerate(repeats1): + for ind, H in enumerate(repeats1): C = repeats2[ind] t = thresholds[ind] - w = fetch_weight(H,C,t) + w = fetch_weight(H, C, t) mut_num += ((H-C)*(H-C)*w) mut_den += w return [mut_num, mut_den] + def output_writer(blk, blk_lines): global winspecies, speciesind - all_elems_1=[] - all_elems_2=[] - all_s_elems_1=[] - all_s_elems_2=[] + all_elems_1 = [] + all_elems_2 = [] + all_s_elems_1 = [] + all_s_elems_2 = [] for bline in blk_lines: if not(bline): continue items = bline.split('\t') seq1 = items[1] - start1 = items[2] - end1 = items[3] seq2 = items[8] - start2 = items[9] - end2 = items[10] if p_group_cols[0] == 6: items[p_group_cols[0]] = int(items[p_group_cols[0]]) items[p_group_cols[1]] = int(items[p_group_cols[1]]) @@ -111,8 +117,8 @@ if s_group_cols[0] != -1: uniq_s_elems_1 = get_unique_elems(all_s_elems_1) uniq_s_elems_2 = get_unique_elems(all_s_elems_2) - mut1={} - mut2={} + mut1 = {} + mut2 = {} count1 = {} count2 = {} """ @@ -120,12 +126,12 @@ uniq_elems_1 = get_unique_units(j.sort(lambda x, y: len(x)-len(y))) """ if p_group_cols[0] == 6: #i.e. the option chosen is group-by repeat number. - uniq_elems_1 = get_binned_lists(uniq_elems_1,p_bin_size) - uniq_elems_2 = get_binned_lists(uniq_elems_2,p_bin_size) + uniq_elems_1 = get_binned_lists( uniq_elems_1, p_bin_size ) + uniq_elems_2 = get_binned_lists( uniq_elems_2, p_bin_size ) if s_group_cols[0] == 6: #i.e. the option chosen is subgroup-by repeat number. - uniq_s_elems_1 = get_binned_lists(uniq_s_elems_1,s_bin_size) - uniq_s_elems_2 = get_binned_lists(uniq_s_elems_2,s_bin_size) + uniq_s_elems_1 = get_binned_lists( uniq_s_elems_1, s_bin_size ) + uniq_s_elems_2 = get_binned_lists( uniq_s_elems_2, s_bin_size ) for pitem1 in uniq_elems_1: #repeats1 = [] @@ -143,61 +149,61 @@ if p_group_cols[0] == 6: belems[p_group_cols[0]] = int(belems[p_group_cols[0]]) if belems[p_group_cols[0]] in pitem1: - if belems[s_group_cols[0]]==sitem1: + if belems[s_group_cols[0]] == sitem1: repeats1.append(int(belems[6])) repeats2.append(int(belems[13])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut1[str(pitem1)+'\t'+str(sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds) + mut1[str(pitem1)+'\t'+str(sitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds ) if region == 'align': - count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2)) - else: + count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1) + count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1) elif winspecies == 2: - count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2) + count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2) else: if type(sitem1) == list: if s_group_cols[0] == 6: belems[s_group_cols[0]] = int(belems[s_group_cols[0]]) - if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]] in sitem1: + if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] in sitem1: repeats1.append(int(belems[6])) repeats2.append(int(belems[13])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds) + mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds ) if region == 'align': - count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2)) - else: + count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats1) + count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats1) elif winspecies == 2: - count1[str(pitem1)+'\t'+str(sitem1)]=sum(repeats2) + count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats2) else: - if belems[p_group_cols[0]]==pitem1 and belems[s_group_cols[0]]==sitem1: + if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] == sitem1: repeats1.append(int(belems[6])) repeats2.append(int(belems[13])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut1["%s\t%s" %(pitem1,sitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds) + mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds ) if region == 'align': - count1[str(pitem1)+'\t'+str(sitem1)]=min(sum(repeats1),sum(repeats2)) - else: + count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats1) + count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1) elif winspecies == 2: - count1["%s\t%s" %(pitem1,sitem1)]=sum(repeats2) + count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2) else: #Sub-group by feature is None for bline in blk_lines: belems = bline.split('\t') if type(pitem1) == list: - #print >>sys.stderr, "item: " + str(item1) + #print >> sys.stderr, "item: " + str(item1) if p_group_cols[0] == 6: belems[p_group_cols[0]] = int(belems[p_group_cols[0]]) if belems[p_group_cols[0]] in pitem1: @@ -208,21 +214,21 @@ else: thresholds.append(non_mono_threshold) else: - if belems[p_group_cols[0]]==pitem1: + if belems[p_group_cols[0]] == pitem1: repeats1.append(int(belems[6])) repeats2.append(int(belems[13])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut1["%s" %(pitem1)]=mutabilityEstimator(repeats1,repeats2,thresholds) + mut1["%s" % (pitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds ) if region == 'align': - count1["%s" %(pitem1)]=min(sum(repeats1),sum(repeats2)) - else: + count1["%s" % (pitem1)] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count1[str(pitem1)]=sum(repeats1) + count1[str(pitem1)] = sum(repeats1) elif winspecies == 2: - count1[str(pitem1)]=sum(repeats2) + count1[str(pitem1)] = sum(repeats2) for pitem2 in uniq_elems_2: #repeats1 = [] @@ -239,57 +245,57 @@ if type(pitem2) == list: if p_group_cols[0] == 6: belems[p_group_cols[1]] = int(belems[p_group_cols[1]]) - if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]]==sitem2: + if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]] == sitem2: repeats2.append(int(belems[13])) repeats1.append(int(belems[6])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds) + mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds ) #count2[str(pitem2)+'\t'+str(sitem2)]=len(repeats2) if region == 'align': - count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2)) - else: + count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2) elif winspecies == 2: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1) else: if type(sitem2) == list: if s_group_cols[0] == 6: belems[s_group_cols[1]] = int(belems[s_group_cols[1]]) - if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]] in sitem2: + if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] in sitem2: repeats2.append(int(belems[13])) repeats1.append(int(belems[6])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds) + mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds ) if region == 'align': - count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2)) - else: + count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2) elif winspecies == 2: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1) else: - if belems[p_group_cols[1]]==pitem2 and belems[s_group_cols[1]]==sitem2: + if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] == sitem2: repeats1.append(int(belems[13])) repeats2.append(int(belems[6])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut2["%s\t%s" %(pitem2,sitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds) + mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds ) if region == 'align': - count2["%s\t%s" %(pitem2,sitem2)]=min(sum(repeats1),sum(repeats2)) - else: + count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) ) + else: if winspecies == 1: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats2) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2) elif winspecies == 2: - count2["%s\t%s" %(pitem2,sitem2)]=len(repeats1) + count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1) else: #Sub-group by feature is None for bline in blk_lines: belems = bline.split('\t') @@ -304,21 +310,21 @@ else: thresholds.append(non_mono_threshold) else: - if belems[p_group_cols[1]]==pitem2: + if belems[p_group_cols[1]] == pitem2: repeats2.append(int(belems[13])) repeats1.append(int(belems[6])) if belems[4] == 'mononucleotide': thresholds.append(mono_threshold) else: thresholds.append(non_mono_threshold) - mut2["%s" %(pitem2)]=mutabilityEstimator(repeats2,repeats1,thresholds) + mut2["%s" % (pitem2)] = mutabilityEstimator( repeats2, repeats1, thresholds ) if region == 'align': - count2["%s" %(pitem2)]=min(sum(repeats1),sum(repeats2)) + count2["%s" % (pitem2)] = min( sum(repeats1), sum(repeats2) ) else: if winspecies == 1: - count2["%s" %(pitem2)]=sum(repeats2) + count2["%s" % (pitem2)] = sum(repeats2) elif winspecies == 2: - count2["%s" %(pitem2)]=sum(repeats1) + count2["%s" % (pitem2)] = sum(repeats1) for key in mut1.keys(): if key in mut2.keys(): mut = (mut1[key][0]+mut2[key][0])/(mut1[key][1]+mut2[key][1]) @@ -328,9 +334,9 @@ unit_found = False if p_group_cols[0] == 7 or s_group_cols[0] == 7: #if it is Repeat Unit (AG, GCT etc.) check for reverse-complements too if p_group_cols[0] == 7: - this,other = 0,1 + this, other = 0, 1 else: - this,other = 1,0 + this, other = 1, 0 groups1 = key.split('\t') mutn = mut1[key][0] mutd = mut1[key][1] @@ -351,28 +357,29 @@ else: mut = mut1[key][0]/mut1[key][1] count = count1[key] - mut = "%.2e" %(mut/num_generations) + mut = "%.2e" % (mut/num_generations) if region == 'align': - print >>fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count) + print >> fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count) elif region == 'win': - fout.write("%s\t%s\t%s\t%s\n" %(blk,key.strip(),mut,count)) + fout.write("%s\t%s\t%s\t%s\n" % ( blk, key.strip(), mut, count )) fout.flush() #catch any remaining repeats, for instance if the orthologous position contained different repeat units for remaining_key in mut2.keys(): mut = mut2[remaining_key][0]/mut2[remaining_key][1] - mut = "%.2e" %(mut/num_generations) + mut = "%.2e" % (mut/num_generations) count = count2[remaining_key] if region == 'align': - print >>fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count) + print >> fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count) elif region == 'win': - fout.write("%s\t%s\t%s\t%s\n" %(blk,remaining_key.strip(),mut,count)) + fout.write("%s\t%s\t%s\t%s\n" % ( blk, remaining_key.strip(), mut, count )) fout.flush() - #print >>fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count) + #print >> fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count) + def counter(node, start, end, report_func): if start <= node.start < end and start < node.end <= end: - report_func(node) + report_func(node) if node.right: counter(node.right, start, end, report_func) if node.left: @@ -381,8 +388,8 @@ counter(node.right, start, end, report_func) elif node.start >= end and node.left and node.left.maxend > start: counter(node.left, start, end, report_func) - - + + def main(): infile = sys.argv[1] @@ -400,21 +407,18 @@ if region == 'win': if dbkey_i in elems[1]: winspecies = 1 - speciesind = 1 + speciesind = 1 elif dbkey_i in elems[8]: winspecies = 2 speciesind = 8 else: - stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.") + stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.") fin = open(infile, 'r') skipped = 0 - blk=0 - win=0 - linestr="" + linestr = "" if region == 'win': - msats = NiceReaderWrapper( fileinput.FileInput( infile ), chrom_col = speciesind, start_col = speciesind+1, @@ -435,7 +439,7 @@ ichr = ielems[chr_col_i] istart = int(ielems[start_col_i]) iend = int(ielems[end_col_i]) - isrc = "%s.%s" %(dbkey_i,ichr) + isrc = "%s.%s" % ( dbkey_i, ichr ) if isrc not in msatTree.chroms: continue result = [] @@ -450,14 +454,14 @@ tmpfile1.seek(0) output_writer(iline, tmpfile1.readlines()) except: - skipped+=1 + skipped += 1 if skipped: - print "Skipped %d intervals as invalid." %(skipped) + print "Skipped %d intervals as invalid." % (skipped) elif region == 'align': if s_group_cols[0] != -1: - print >>fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount" + print >> fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount" else: - print >>fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount" + print >> fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount" prev_bnum = -1 try: for line in fin: @@ -481,9 +485,11 @@ prev_bnum = new_bnum output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n')) except Exception, ea: - print >>sys.stderr, ea + print >> sys.stderr, ea skipped += 1 if skipped: - print "Skipped %d lines as invalid." %(skipped) + print "Skipped %d lines as invalid." % (skipped) + + if __name__ == "__main__": - main() \ No newline at end of file + main() diff -r c043a2ca8051de612d4e895ba55e6dce0697a8d3 -r 82d2c2109792d098ddc5ede7992ae132e596584a tools/regVariation/partialR_square.py --- a/tools/regVariation/partialR_square.py +++ b/tools/regVariation/partialR_square.py @@ -2,7 +2,7 @@ from galaxy import eggs -import sys, string +import sys from rpy import * import numpy @@ -13,6 +13,7 @@ sys.stderr.write(msg) sys.exit() + def sscombs(s): if len(s) == 1: return [s] @@ -26,14 +27,14 @@ x_cols = sys.argv[3].split(',') outfile = sys.argv[4] -print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1) +print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 ) fout = open(outfile,'w') for i, line in enumerate( file ( infile )): line = line.rstrip('\r\n') if len( line )>0 and not line.startswith( '#' ): elems = line.split( '\t' ) - break + break if i == 30: break # Hopefully we'll never get here... @@ -43,7 +44,7 @@ y_vals = [] x_vals = [] -for k,col in enumerate(x_cols): +for k, col in enumerate(x_cols): x_cols[k] = int(col)-1 x_vals.append([]) """ @@ -51,13 +52,13 @@ float( elems[x_cols[k]] ) except: try: - msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." %( col, elems[x_cols[k]] ) + msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." % ( col, elems[x_cols[k]] ) except: msg = "This operation cannot be performed on non-numeric data." stop_err( msg ) """ NA = 'NA' -for ind,line in enumerate( file( infile )): +for ind, line in enumerate( file( infile )): if line and not line.startswith( '#' ): try: fields = line.split("\t") @@ -65,20 +66,20 @@ yval = float(fields[y_col]) except Exception, ey: yval = r('NA') - #print >>sys.stderr, "ey = %s" %ey + #print >> sys.stderr, "ey = %s" %ey y_vals.append(yval) - for k,col in enumerate(x_cols): + for k, col in enumerate(x_cols): try: xval = float(fields[col]) except Exception, ex: xval = r('NA') - #print >>sys.stderr, "ex = %s" %ex + #print >> sys.stderr, "ex = %s" %ex x_vals[k].append(xval) except: pass x_vals1 = numpy.asarray(x_vals).transpose() -dat= r.list(x=array(x_vals1), y=y_vals) +dat = r.list(x=array(x_vals1), y=y_vals) set_default_mode(NO_CONVERSION) try: @@ -91,7 +92,7 @@ fullr2 = summary.get('r.squared','NA') if fullr2 == 'NA': - stop_error("Error in linear regression") + stop_err("Error in linear regression") if len(x_vals) < 10: s = "" @@ -100,10 +101,10 @@ else: stop_err("This tool only works with less than 10 predictors.") -print >>fout, "#Model\tR-sq\tpartial_R_Terms\tpartial_R_Value" +print >> fout, "#Model\tR-sq\tpartial_R_Terms\tpartial_R_Value" all_combos = sorted(sscombs(s), key=len) all_combos.reverse() -for j,cols in enumerate(all_combos): +for j, cols in enumerate(all_combos): #if len(cols) == len(s): #Same as the full model above # continue if len(cols) == 1: @@ -113,7 +114,7 @@ for col in cols: x_v.append(x_vals[int(col)]) x_vals1 = numpy.asarray(x_v).transpose() - dat= r.list(x=array(x_vals1), y=y_vals) + dat = r.list(x=array(x_vals1), y=y_vals) set_default_mode(NO_CONVERSION) red = r.lm(r("y ~ x"), data= dat) #Reduced model set_default_mode(BASIC_CONVERSION) @@ -136,11 +137,11 @@ partial_R_col_str = "-" partial_R = "-" try: - redr2 = "%.4f" %(float(redr2)) + redr2 = "%.4f" % (float(redr2)) except: pass try: - partial_R = "%.4f" %(float(partial_R)) + partial_R = "%.4f" % (float(partial_R)) except: pass - print >>fout, "%s\t%s\t%s\t%s" %(col_str,redr2,partial_R_col_str,partial_R) + print >> fout, "%s\t%s\t%s\t%s" % ( col_str, redr2, partial_R_col_str, partial_R ) This diff is so big that we needed to truncate the remainder. Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org