April 2010 - galaxy-dev - lists.galaxyproject.org

[hg] galaxy 3642: Merge
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/122a4568c046 changeset: 3642:122a4568c046 user: jeremy goecks <jeremy.goecks(a)emory.edu> date: Tue Apr 13 17:29:18 2010 -0400 description: Merge diffstat: templates/root/history.mako | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diffs (11 lines): diff -r 37c075416918 -r 122a4568c046 templates/root/history.mako --- a/templates/root/history.mako Tue Apr 13 17:12:00 2010 -0400 +++ b/templates/root/history.mako Tue Apr 13 17:29:18 2010 -0400 @@ -47,6 +47,7 @@ } }); %endif + $(".tipsy").remove(); } }); return false;

1 0

[hg] galaxy 3641: Rudimentary Cufflinks wrapper. Also created di...
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/37c075416918 changeset: 3641:37c075416918 user: jeremy goecks <jeremy.goecks(a)emory.edu> date: Tue Apr 13 17:12:00 2010 -0400 description: Rudimentary Cufflinks wrapper. Also created directory for all NGS RNA-seq tools, and Tophat now uses Bowtie indices rather than own indices file. diffstat: test-data/cufflinks_in.sam | 183 ++++++++++++++++++++++++++++++++++++ test-data/cufflinks_out1.gtf | 4 + test-data/cufflinks_out2.expr | 2 + test-data/cufflinks_out3.expr | 2 + tool-data/tophat_indices.loc.sample | 29 ----- tool_conf.xml.sample | 5 +- tools/ngs_rna/cufflinks_wrapper.py | 83 ++++++++++++++++ tools/ngs_rna/cufflinks_wrapper.xml | 91 +++++++++++++++++ tools/ngs_rna/tophat_wrapper.py | 80 +++++++++++++++ tools/ngs_rna/tophat_wrapper.xml | 129 +++++++++++++++++++++++++ tools/tophat/tophat_wrapper.py | 80 --------------- tools/tophat/tophat_wrapper.xml | 124 ------------------------ 12 files changed, 577 insertions(+), 235 deletions(-) diffs (872 lines): diff -r 869e494a8074 -r 37c075416918 test-data/cufflinks_in.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cufflinks_in.sam Tue Apr 13 17:12:00 2010 -0400 @@ -0,0 +1,183 @@ +test_mRNA_3_187_51 99 test_chromosome 53 255 75M = 163 0 TACTATTTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTCGGACTACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_4_191_5d 163 test_chromosome 54 255 75M = 167 0 ACTATCTGACGAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTACCATTACGCGGATGACGACTAGGACTACGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 +test_mRNA_5_197_46 97 test_chromosome 55 255 75M = 173 0 CTATCTGACTAGACTCGAGGCGCTTGCGTCTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_6_182_59 99 test_chromosome 56 255 75M = 158 0 TATCTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCCAGTACGGGGATGACGACTAGGACTACGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_8_155_9 163 test_chromosome 58 255 75M = 131 0 TGTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_8_197_1 99 test_chromosome 58 255 75M = 173 0 TCTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGACACTACGGGGATGGCGACTAGGACTACGGACGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_9_179_52 163 test_chromosome 59 255 75M = 155 0 CTGACTAGACTGGAGGCGCTCGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_11_190_1a 99 test_chromosome 61 255 75M = 166 0 GACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_16_194_10 163 test_chromosome 66 255 75M = 170 0 GACTGGATGCGCTTGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTCGGACTACGGACGGACTTAAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_21_208_24 163 test_chromosome 71 255 75M = 184 0 GAGGCGCTTGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_22_173_62 99 test_chromosome 72 255 75M = 149 0 AGGCGCTTGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_23_186_42 163 test_chromosome 73 255 75M = 162 0 GGCGCTTGTGACTGAGCTAGGACGTGCCACTACGGGGATGAAGACTAGGACTACGGACGGACTTAGAGCGTCAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_26_189_30 163 test_chromosome 76 255 75M = 165 0 GCTTGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_28_188_11 99 test_chromosome 78 255 75M = 164 0 TTGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGAACGGACTTAGAGCGTCAGATGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_28_206_1f 73 test_chromosome 78 255 75M * 0 0 TTGCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGACGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_30_231_3c 161 test_chromosome 80 255 75M = 207 0 GCGACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_33_223_4e 73 test_chromosome 83 255 75M * 0 0 ACTGAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_36_146_27 163 test_chromosome 86 255 75M = 122 0 GCGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACAGACGGACTTAGAGCGTCAGATGCAGCGACTGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_36_218_12 99 test_chromosome 86 255 75M = 194 0 GAGCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGCCTTAGAGCGTCAGATGCAGCGACTGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_38_199_29 99 test_chromosome 88 255 75M = 175 0 GCTAGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_39_219_5c 99 test_chromosome 89 255 75M = 195 0 CTAGGACGTCCCACTATGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGGCTGGACTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_41_236_55 97 test_chromosome 91 255 75M = 212 0 AGGACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGAATATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_42_209_25 99 test_chromosome 92 255 75M = 185 0 GGACGTGCCACTACGTGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_44_193_3f 99 test_chromosome 94 255 75M = 169 0 ACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGTCTATTTAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_44_197_35 99 test_chromosome 94 255 75M = 173 0 ACGTGCAACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_44_225_1e 163 test_chromosome 94 255 75M = 201 0 ACGTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCGGGTGCAGCGACTGGACTATTTAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_46_195_17 137 test_chromosome 96 255 75M * 0 0 GTGCCACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_46_232_2f 99 test_chromosome 96 255 75M = 208 0 GTGCCACTACGGGGATGACGACTAGGACTACGGCCGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_48_207_39 73 test_chromosome 98 255 75M * 0 0 GCCCCTACGGGGATGACGACTAGGACTACGGACGGATTTAGACCGTCAGATGCAGCGACTGGACTATTTAGGACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_48_249_20 161 test_chromosome 98 255 75M = 225 0 GCCACTACGGGGATGACGACTAGGACGACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_50_224_2d 163 test_chromosome 100 255 75M = 200 0 CACTACGAGGATGACGTCTAGGACTACGGACGGACTTAGAGCGTCAGACGCAGCGACTGGACTATTTAGGACGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_51_194_47 163 test_chromosome 101 255 75M = 170 0 ACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_51_194_49 99 test_chromosome 101 255 75M = 170 0 ACTACGGGGATGACGACTAGGCCTACGGATGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_51_237_a 99 test_chromosome 101 255 75M = 213 0 ACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_51_248_14 97 test_chromosome 101 255 75M = 224 0 ACTACGGGGATGACGACGAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGAACTTTTTAGGACGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_52_261_1b 97 test_chromosome 102 255 75M = 237 0 CTACGGGAATGACGACTAGGGCTACGGAGGGACTTACAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 +test_mRNA_53_212_19 99 test_chromosome 103 255 75M = 188 0 TACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGAATATTTAGGACGATCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_53_272_5a 161 test_chromosome 103 255 75M = 248 0 TACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_56_183_56 99 test_chromosome 106 255 75M = 159 0 GGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTGGGACGATCGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_57_231_8 99 test_chromosome 107 255 75M = 207 0 GGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCACCGACTGGACTATTTAGGACGATCGGACTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_58_218_16 163 test_chromosome 108 255 75M = 194 0 GGATGACGACTAGGACTACGGACGGACTTAGAACGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_58_220_3d 163 test_chromosome 108 255 75M = 196 0 GGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_58_234_7 163 test_chromosome 108 255 75M = 210 0 GGATGACGCCTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_63_229_4c 163 test_chromosome 113 255 75M = 205 0 ACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGCAGGGACTGGACTATTTAGGACGATCGGACTGAGGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_65_238_2e 99 test_chromosome 115 255 75M = 214 0 GACTAGGACTACGGACGGACTTAGAGCGTCAGAAGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_69_229_23 163 test_chromosome 119 255 75M = 205 0 AGGACTACGGACGGACTTATAGGGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_36_146_27 83 test_chromosome 122 255 75M = 86 0 ACTACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGTGCAGTAGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_72_258_4 163 test_chromosome 122 255 75M = 234 0 ACTACGGACGGACTTAGAGCGTCAGATGCAGCAACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_73_240_34 99 test_chromosome 123 255 75M = 216 0 CTACGGACGGACTTAGAGCGTCAGATGCAGCGAATGGACTATTTAGGACGCTCGGACTGAGGAGGGCAGTAGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_73_259_5e 99 test_chromosome 123 255 75M = 235 0 CTACGGACGGACTTAGAGCGTCAGATGCTGCGACTGGACTATTTGGGACGATCGGACTGAGGAGGGCAGTAGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_75_204_54 73 test_chromosome 125 255 75M * 0 0 ACGGACGGACTTCGAGCCTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_75_235_21 73 test_chromosome 125 255 75M * 0 0 ACGGACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGCACGATCGGACTGAGGAGGGCAGTAGAACGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_75_277_3b 97 test_chromosome 125 255 75M = 353 0 ACGGACGGACTTAAAGCTTCAGATGCAGCGACAGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_77_256_2c 73 test_chromosome 127 255 75M * 0 0 GGACGGACTTAGAGCATCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_78_276_4b 97 test_chromosome 128 255 75M = 352 0 GACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGGCGCTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_79_256_31 137 test_chromosome 129 255 75M * 0 0 ACGGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_81_228_3a 163 test_chromosome 131 255 75M = 204 0 GGACTGAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGTAGTAGGACGCTACGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_81_245_4d 163 test_chromosome 131 255 75M = 221 0 GGACTTAGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGATGAGGGCAGTAGGACGCTACGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_8_155_9 83 test_chromosome 131 255 75M = 58 0 GGACTTCGAGCGTCAGATGCAGCGACTGTACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_82_255_2 137 test_chromosome 132 255 75M * 0 0 GACTTAGAGCGTCAGATGCAGCGACTGGACTTTTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_82_271_58 99 test_chromosome 132 255 75M = 247 0 GACTTAGAGCGTCAGTTGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_85_268_53 99 test_chromosome 135 255 75M = 244 0 TTAGTGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_87_250_57 163 test_chromosome 137 255 75M = 226 0 AGAGCGTCAGATGCAGAGACTGGACTATTTAGGACGATCGGACTGAGGAGTGCAGTAGGACGCTACGTATTTGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_87_279_5f 161 test_chromosome 137 255 75M = 355 0 AGAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACCGAGGAGGGCAGTAGGACGCTACGTATTTGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_88_257_50 137 test_chromosome 138 255 75M * 0 0 GAGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_89_230_b 163 test_chromosome 139 255 75M = 206 0 AGCGTCAGGTGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_89_245_15 99 test_chromosome 139 255 75M = 221 0 AGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_89_267_32 163 test_chromosome 139 255 75M = 243 0 AGCGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGAGTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_91_256_41 73 test_chromosome 141 255 75M * 0 0 CGTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_92_250_44 99 test_chromosome 142 255 75M = 226 0 GTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 +test_mRNA_92_266_43 99 test_chromosome 142 255 75M = 242 0 GTCAGATGCAGCGACTGGACTATTTAGGACGATCGGACTCAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_96_238_3 163 test_chromosome 146 255 75M = 214 0 GATGCAGCGACTGGACTATTTAGGACGATCGGACGGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_97_275_26 97 test_chromosome 147 255 75M = 351 0 ATGCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_22_173_62 147 test_chromosome 149 255 75M = 72 0 GCAGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_103_284_2a 161 test_chromosome 153 255 75M = 360 0 CGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_104_278_3e 161 test_chromosome 154 255 75M = 354 0 GACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTTTTTGGCGCGCGGCCCTACGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_105_266_13 163 test_chromosome 155 255 75M = 242 0 ACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_105_276_c 161 test_chromosome 155 255 75M = 352 0 ACTGGACTATTTAGGACGATCGGACTGAGGAAGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_9_179_52 83 test_chromosome 155 255 75M = 59 0 ACTGGACCATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_106_253_45 137 test_chromosome 156 255 75M * 0 0 CTGGACTATTTAGGTCGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_107_286_5 161 test_chromosome 157 255 75M = 362 0 TGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGCATTTGGCGCGCGGCCCTACGGCTGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_6_182_59 147 test_chromosome 158 255 75M = 56 0 GGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_56_183_56 147 test_chromosome 159 255 75M = 106 0 GACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_110_267_22 163 test_chromosome 160 255 75M = 243 0 ACTAGTTAGGGCGATCGGACTGAGGAGGGCAGTAGGACGCTACGTAGTTGGCGCGCGGCCCTACGACTGAGCGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:5 +test_mRNA_110_271_28 99 test_chromosome 160 255 75M = 247 0 ACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_111_297_61 161 test_chromosome 161 255 75M = 373 0 CTATTTAGGACGATCGGACTGGGGAGGGCAGTAGGACGCTACGGATTTGGCGCGCGGCCCTACGGCTGAGCGTCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_23_186_42 83 test_chromosome 162 255 75M = 73 0 TATTTAGGACGATCGGACGGAGGAGGGCAGAAGGACGCTACGTATTTGGCGCGCGGCCCTACGACTGAGCGTCGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 +test_mRNA_3_187_51 147 test_chromosome 163 255 75M = 53 0 ATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_114_277_5b 161 test_chromosome 164 255 75M = 353 0 TTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGCCTGAGCGTCGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_28_188_11 147 test_chromosome 164 255 75M = 78 0 TTTAGGACGATCGGACTGAGGAAGGCAGTAGGACGCTTCGTATTTGGCGCGAGGCCCTACGGCTGAGCGTCGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 +test_mRNA_26_189_30 83 test_chromosome 165 255 75M = 76 0 TTAGGACGATCGGACTGAGGAGGGCAGTAGGACGGTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_33_189_4a 89 test_chromosome 165 255 75M * 0 0 TTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACCTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_116_271_2b 163 test_chromosome 166 255 75M = 247 0 TAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_116_295_63 161 test_chromosome 166 255 75M = 371 0 TAGGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_11_190_1a 147 test_chromosome 166 255 75M = 61 0 TAGGTCGATGGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGTGGCCCTACGGCTGAGCGTCGAGCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 +test_mRNA_4_191_5d 83 test_chromosome 167 255 75M = 54 0 AGGACGATCGGACTGAGTAGGGCAGTAGGACACTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_118_297_f 161 test_chromosome 168 255 75M = 373 0 GGACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_44_193_3f 147 test_chromosome 169 255 75M = 94 0 GACGATCGGACTGGGGAGAGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_16_194_10 83 test_chromosome 170 255 75M = 66 0 ACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_51_194_47 83 test_chromosome 170 255 75M = 101 0 ACGATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_51_194_49 147 test_chromosome 170 255 75M = 101 0 ACGTTCGGACTGAGGAGGGCAGTAGGACGCCACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 +test_mRNA_122_299_6 161 test_chromosome 172 255 75M = 375 0 GATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_44_197_35 147 test_chromosome 173 255 75M = 94 0 ATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGATCGTCGAGCTTGCGATAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 +test_mRNA_5_197_46 145 test_chromosome 173 255 75M = 55 0 ATCGGACGGAGGAGGGCAGTAGGACGCTACGTATTTGGCGGGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_8_197_1 147 test_chromosome 173 255 75M = 58 0 ATCGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_125_280_48 97 test_chromosome 175 255 75M = 356 0 CGGACTGAGGAGGGCAGTAGGACGCTATGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGAAACGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 +test_mRNA_125_293_60 161 test_chromosome 175 255 75M = 369 0 CGGACTGAGGAGGGCAGTAGGACGCTATGTATTTGGCGCGCGGCCCTACGGCTGAGCTTCGAGGTTGCGATACGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 +test_mRNA_38_199_29 147 test_chromosome 175 255 75M = 88 0 CGGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_126_282_18 161 test_chromosome 176 255 75M = 358 0 GGACTGAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 +test_mRNA_131_260_33 99 test_chromosome 181 255 70M100N5M = 236 0 GAGGAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_21_208_24 83 test_chromosome 184 255 67M100N8M = 71 0 GAGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGCCTGAGCGTCGAGCTTGCGATACGCCACTATTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_42_209_25 147 test_chromosome 185 255 66M100N9M = 92 0 AGGGCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_53_212_19 147 test_chromosome 188 255 63M100N12M = 103 0 GCAGTAGGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTTCTTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:1 +test_mRNA_36_218_12 147 test_chromosome 194 255 57M100N18M = 86 0 GGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_58_218_16 83 test_chromosome 194 255 57M100N18M = 108 0 GGACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_145_300_37 163 test_chromosome 195 255 56M100N19M = 376 0 GACGCTACGTATTTGGCGCGGGGCCCTATGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTAGTATATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:5 XS:A:+ NS:i:2 +test_mRNA_39_219_5c 147 test_chromosome 195 255 56M100N19M = 89 0 GACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCCAGCTTGCGATACGCCACTATTACTTTATTATCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_58_220_3d 83 test_chromosome 196 255 55M100N20M = 108 0 ACGCTACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGGTTGCGATACGCCACTATTACTTTATTATCTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:1 +test_mRNA_50_224_2d 83 test_chromosome 200 255 51M100N24M = 100 0 TACGTATTTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_44_225_1e 83 test_chromosome 201 255 50M100N25M = 94 0 ACGTATATGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_81_228_3a 83 test_chromosome 204 255 47M100N28M = 131 0 TATTTGGCGCGCGGCCCTATGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGTAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XS:A:+ NS:i:0 +test_mRNA_63_229_4c 83 test_chromosome 205 255 46M100N29M = 113 0 ATTTGGCGCGCGGCCCTACGGCTGAGTGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:1 +test_mRNA_69_229_23 83 test_chromosome 205 255 46M100N29M = 119 0 CTTTGGCGCGCGGCCCTACGGCTGAGCGTCTAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:1 +test_mRNA_89_230_b 83 test_chromosome 206 255 45M100N30M = 139 0 TCTGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTAACTCACTCGGACGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XS:A:+ NS:i:0 +test_mRNA_30_231_3c 81 test_chromosome 207 255 44M100N31M = 80 0 TTGGCGCGCGGCCCTACGGCTAAGCGTCGAGCTTGCGATACGCCACTATTACTTTAATATCTTACTCGCACGTAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XS:A:+ NS:i:0 +test_mRNA_57_231_8 147 test_chromosome 207 255 44M100N31M = 107 0 TTGGCGCGCGGCCCTAGGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_46_232_2f 147 test_chromosome 208 255 43M100N32M = 96 0 TGGCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_58_234_7 83 test_chromosome 210 255 41M100N34M = 108 0 GCGCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTAGTTTATTATCTGACTCGGACGTAGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XS:A:+ NS:i:1 +test_mRNA_41_236_55 145 test_chromosome 212 255 39M100N36M = 91 0 GCGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_51_237_a 147 test_chromosome 213 255 38M100N37M = 101 0 CGCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_65_238_2e 147 test_chromosome 214 255 37M100N38M = 115 0 GCGGCCCTACGGCTGCGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_96_238_3 83 test_chromosome 214 255 37M100N38M = 146 0 GCGGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCTACTAGTACTTTATTATCTTACGCGGACGTAGACGGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XS:A:+ NS:i:2 +test_mRNA_73_240_34 147 test_chromosome 216 255 35M100N40M = 123 0 GGCCCTACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTTCTCGGACGTAGACGGATCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_81_245_4d 83 test_chromosome 221 255 30M100N45M = 131 0 TACGGCTGAGCGTCGAGGTTGCGATACGCCACTATTACTTTATAATCTTACTCGGACGTAGACGGATCGGCAACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:1 +test_mRNA_89_245_15 147 test_chromosome 221 255 30M100N45M = 139 0 TACGGCTGAGCGTCGAGCTTGCGATACGCCACTATTTCTCTATTATCTTACTCGGACGTAGACGGATCGGCAACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:2 +test_mRNA_172_294_4f 99 test_chromosome 222 255 29M100N46M = 370 0 ACGGATGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTCCTCGGACGTAGACGGATCGCCAACGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:1 +test_mRNA_51_248_14 145 test_chromosome 224 255 27M100N48M = 101 0 GGCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGAACGGCAACGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:0 +test_mRNA_48_249_20 81 test_chromosome 225 255 26M100N49M = 98 0 GCTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTACTATCTTACTCGGACGGAGACGGATCGGCAACGGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:0 +test_mRNA_87_250_57 83 test_chromosome 226 255 25M100N50M = 137 0 ATGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_92_250_44 147 test_chromosome 226 255 25M100N50M = 142 0 CTGAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGGTACGGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_128_252_36 153 test_chromosome 228 255 23M100N52M * 0 0 GAGCGTCGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGGAACGGGACTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XS:A:+ NS:i:0 +test_mRNA_72_258_4 83 test_chromosome 234 255 17M100N50M100N8M = 122 0 CGAGCTTGCGATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGATGGGCAACGGGACTTTTTCTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:1 +test_mRNA_73_259_5e 147 test_chromosome 235 255 16M100N50M100N9M = 123 0 GAGCTTGCGATACGCCACTATTACTGTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_131_260_33 147 test_chromosome 236 255 15M100N50M100N10M = 181 0 AGCTTGTGATACGCCACTATTACTTTATTATCTTACTCGGACGTAAACGGATCGGCCACGGGACTTTTTTTACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XS:A:+ NS:i:3 +test_mRNA_52_261_1b 145 test_chromosome 237 255 14M100N50M100N11M = 102 0 GCTTGCGATACGCCACTATTACTTAATTATCTTACTCGGACGTAGAAGGATCGGCAACGGGACTTTTTCTACTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:1 +test_mRNA_105_266_13 83 test_chromosome 242 255 9M100N50M100N16M = 155 0 CGATCCGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:1 +test_mRNA_92_266_43 147 test_chromosome 242 255 9M100N50M100N16M = 142 0 CGATACGCCACTATTACTTTCTTATCTTACTCGGACGTAGACGGAGCGGCAACGGGACTTTTTCTACTTGAGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:2 +test_mRNA_110_267_22 83 test_chromosome 243 255 8M100N50M100N17M = 160 0 GATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:+ NS:i:0 +test_mRNA_89_267_32 83 test_chromosome 243 255 8M100N50M100N17M = 139 0 GATACGGCACTATTACTTTATTATCTTTCTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:1 +test_mRNA_111_268_d 89 test_chromosome 244 255 7M100N50M100N18M * 0 0 ATACGCCACTATTATTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:1 +test_mRNA_85_268_53 147 test_chromosome 244 255 7M100N50M100N18M = 135 0 ATACGCCACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGTCAACGGGACTTTTTCTACTTGAGACTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_110_271_28 147 test_chromosome 247 255 4M100N50M100N21M = 160 0 CGCCACTATTACTTTATTATCTTACTCGGACGAAGACGGATCGGCAACGGGGCTTTTTCTACTTGAGACTGGGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:1 +test_mRNA_116_271_2b 83 test_chromosome 247 255 4M100N50M100N21M = 166 0 CGCCACTATTACTTTATTATCTTACTCGGACGTAGACAGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_82_271_58 147 test_chromosome 247 255 4M100N50M100N21M = 132 0 CGCCACTATTACTTTATTATCTTACTCGGACGTAGACGCATCGGCAACGGGACTTTTTCTACTTGAGACTGGGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_53_272_5a 81 test_chromosome 248 255 3M100N50M100N22M = 103 0 GCCACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGACACTGGGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:1 +test_mRNA_104_274_1c 89 test_chromosome 350 255 51M100N24M * 0 0 CACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_85_275_38 153 test_chromosome 351 255 50M100N25M * 0 0 ACTCTTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTACTACTTGAGACTGGGATCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_97_275_26 145 test_chromosome 351 255 50M100N25M = 147 0 ACTATTACTTTATTATCTTAGTCGGACGTAGACGGATCGGAAACGGGACTCTTTCTACTTGAGACTGGGATCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:0 +test_mRNA_105_276_c 81 test_chromosome 352 255 49M100N26M = 155 0 CTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGGCTTTTTCTACTTGAGACTGGGATCGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_78_276_4b 145 test_chromosome 352 255 49M100N26M = 128 0 CTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTAGGATCGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_114_277_5b 81 test_chromosome 353 255 48M100N27M = 164 0 TATTACTTTATTATCTTACTCGGAGGTAGACGGAACGGCAACGGGACTTTTTCTGCTTGAGACTGGGATCGAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XS:A:+ NS:i:0 +test_mRNA_75_277_3b 145 test_chromosome 353 255 48M100N27M = 125 0 TATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACCTGAGACTGGGATCGAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_104_278_3e 17 test_chromosome 354 255 47M100N28M = 154 0 ATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGAATCGAGGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:0 +test_mRNA_104_278_3e 81 test_chromosome 354 255 47M100N28M = 154 0 ATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGAATCGAGGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_87_279_5f 81 test_chromosome 355 255 46M100N29M = 137 0 TTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:+ NS:i:0 +test_mRNA_125_280_48 145 test_chromosome 356 255 45M100N30M = 175 0 TACTTTATTATCTTACTCTGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGAGCGAGGCGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_126_282_18 81 test_chromosome 358 255 43M100N32M = 176 0 CTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:+ NS:i:0 +test_mRNA_103_284_2a 81 test_chromosome 360 255 41M100N34M = 153 0 TTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:+ NS:i:0 +test_mRNA_107_286_5 81 test_chromosome 362 255 39M100N36M = 157 0 ATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:+ NS:i:0 +test_mRNA_151_286_e 153 test_chromosome 362 255 39M100N36M * 0 0 ATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACTTTATCTACTTGAGACTGGGATCGAGGCGGACTTTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:1 +test_mRNA_150_290_0 89 test_chromosome 366 255 35M100N40M * 0 0 TCTTACTCGGACGTAGACGGATCGCCAACGGGACTTTTTCTACTTGAGACTGAGACCGAGGCGGACTTTTTAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:0 +test_mRNA_94_291_40 153 test_chromosome 367 255 34M100N41M * 0 0 CTTCCTGGGACGTAGACGGATCGGCAACGCGACATTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTGGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:5 XS:A:+ NS:i:2 +test_mRNA_125_293_60 81 test_chromosome 369 255 32M100N43M = 175 0 TACTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:+ NS:i:0 +test_mRNA_172_294_4f 147 test_chromosome 370 255 31M100N44M = 222 0 ACTCGGACGTAGACGGGTCGGCAGCGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACGTTTTAGGACGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:0 +test_mRNA_116_295_63 81 test_chromosome 371 255 30M100N45M = 166 0 CTCGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:+ NS:i:0 +test_mRNA_111_297_61 17 test_chromosome 373 255 28M100N47M = 161 0 CGGACGTAGACGGATCCGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:0 +test_mRNA_111_297_61 81 test_chromosome 373 255 28M100N47M = 161 0 CGGACGTAGACGGATCCGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_118_297_f 17 test_chromosome 373 255 28M100N47M = 168 0 CGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_118_297_f 81 test_chromosome 373 255 28M100N47M = 168 0 CGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:+ NS:i:0 +test_mRNA_151_297_1d 153 test_chromosome 373 255 28M100N47M * 0 0 CGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACATTTTAGGACGGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 +test_mRNA_151_297_1d 25 test_chromosome 373 255 28M100N47M * 0 0 CGGACGTAGACGGATCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACATTTTAGGACGGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:3 XS:A:+ NS:i:0 +test_mRNA_122_299_6 81 test_chromosome 375 255 26M100N49M = 172 0 GACGTAGACGGAGCGGCAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:+ NS:i:0 +test_mRNA_145_300_37 83 test_chromosome 376 255 25M100N50M = 195 0 ACGTAGACGGATCGGAAACGGGACTTTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACTTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 XS:A:+ NS:i:0 diff -r 869e494a8074 -r 37c075416918 test-data/cufflinks_out1.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cufflinks_out1.gtf Tue Apr 13 17:12:00 2010 -0400 @@ -0,0 +1,4 @@ +test_chromosome Cufflinks transcript 53 550 1000 + . gene_id "CUFF.1"; transcript_id "CUFF.1.1"; FPKM "3355704.6979865772"; frac "1.000000"; conf_lo "2697596.861952"; conf_hi "4013812.534021"; cov "46.057047"; +test_chromosome Cufflinks exon 53 250 1000 + . gene_id "CUFF.1"; transcript_id "CUFF.1.1"; exon_number "1"; FPKM "3355704.6979865772"; frac "1.000000"; conf_lo "2697596.861952"; conf_hi "4013812.534021"; cov "46.057047"; +test_chromosome Cufflinks exon 351 400 1000 + . gene_id "CUFF.1"; transcript_id "CUFF.1.1"; exon_number "2"; FPKM "3355704.6979865772"; frac "1.000000"; conf_lo "2697596.861952"; conf_hi "4013812.534021"; cov "46.057047"; +test_chromosome Cufflinks exon 501 550 1000 + . gene_id "CUFF.1"; transcript_id "CUFF.1.1"; exon_number "3"; FPKM "3355704.6979865772"; frac "1.000000"; conf_lo "2697596.861952"; conf_hi "4013812.534021"; cov "46.057047"; diff -r 869e494a8074 -r 37c075416918 test-data/cufflinks_out2.expr --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cufflinks_out2.expr Tue Apr 13 17:12:00 2010 -0400 @@ -0,0 +1,2 @@ +trans_id bundle_id chr left right FPKM FMI frac FPKM_conf_lo FPKM_conf_hi coverage length +CUFF.1.1 6 test_chromosome 52 550 3.3557e+06 1 1 2.6976e+06 4.01381e+06 46.057 298 diff -r 869e494a8074 -r 37c075416918 test-data/cufflinks_out3.expr --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cufflinks_out3.expr Tue Apr 13 17:12:00 2010 -0400 @@ -0,0 +1,2 @@ +gene_id bundle_id chr left right FPKM FPKM_conf_lo FPKM_conf_hi +CUFF.1 6 test_chromosome 52 550 3.3557e+06 2.6976e+06 4.01381e+06 diff -r 869e494a8074 -r 37c075416918 tool-data/tophat_indices.loc.sample --- a/tool-data/tophat_indices.loc.sample Tue Apr 13 17:02:56 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Tophat and Bowtie indexed sequences data files. You will need -#to create these data files and then create a tophat_indices.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The tophat_indices.loc -#file has this format (white space characters are TAB characters): -# -#<build> <file_base> -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/tophat/hg18/, -#then the tophat_indices.loc entry would look like this: -# -#hg18 /depot/data2/galaxy/tophat/hg18/hg18 -# -#and your /depot/data2/galaxy/tophat/hg18/ directory -#would contain hg18.*.ebwt files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt -#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 gh18.3.ebwt -#...etc... -# -#Your tophat_indices.loc file should include an entry per line for -#each index set you have stored. The "file" in the path does not actually -#exist, but it is the prefix for the actual index files. For example: -# -#hg18 /depot/data2/galaxy/bowtie/hg18/hg18 -#hg19 /depot/data2/galaxy/bowtie/hg19/hg19 diff -r 869e494a8074 -r 37c075416918 tool_conf.xml.sample --- a/tool_conf.xml.sample Tue Apr 13 17:02:56 2010 -0400 +++ b/tool_conf.xml.sample Tue Apr 13 17:12:00 2010 -0400 @@ -225,8 +225,9 @@ <tool file="metag_tools/megablast_xml_parser.xml" /> <tool file="sr_mapping/PerM.xml" /> </section> - <section name="NGS: Expression Analysis" id="rnatools"> - <tool file="tophat/tophat_wrapper.xml" /> + <section name="NGS: Expression Analysis" id="ngs-rna-tools"> + <tool file="ngs_rna/tophat_wrapper.xml" /> + <tool file="ngs_rna/cufflinks_wrapper.xml" /> </section> <section name="NGS: SAM Tools" id="samtools"> <tool file="samtools/sam_bitwise_flag_filter.xml" /> diff -r 869e494a8074 -r 37c075416918 tools/ngs_rna/cufflinks_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ngs_rna/cufflinks_wrapper.py Tue Apr 13 17:12:00 2010 -0400 @@ -0,0 +1,83 @@ +#!/usr/bin/env python + +import optparse, os, shutil, subprocess, sys, tempfile + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' ) + parser.add_option( '-s', '--inner-dist-std-dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' ) + parser.add_option( '-I', '--max-intron-length', help='The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.' ) + parser.add_option( '-F', '--min-isoform-fraction', help='After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.' ) + parser.add_option( '-j', '--pre-mrna-fraction', help='Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.' ) + parser.add_option( '-p', '--num-threads', help='Use this many threads to align reads. The default is 1.' ) + parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \ + For, example, for paired end runs with fragments selected at 300bp, \ + where each end is 50bp, you should set -r to be 200. The default is 45bp.') + parser.add_option( '-Q', '--min-mapqual', help='Instructs Cufflinks to ignore alignments with a SAM mapping quality lower than this number. The default is 0.' ) + parser.add_option( '-L', '--label', help='Cufflinks will report transfrags in GTF format, with a prefix given by this option. The default prefix is "CUFF".' ) + parser.add_option( '-G', '--GTF', help='Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.' ) + # Advanced Options: + parser.add_option( '--num-importance-samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' ) + parser.add_option( '--max-mle-iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' ) + + # Wrapper / Galaxy options. + parser.add_option( '-A', '--assembled-isoforms-output', dest='assembled_isoforms_output_file', help='Assembled isoforms output file; formate is GTF.' ) + parser.add_option( '-T', '--transcripts-expression-output', dest='transcripts_expression_output_file', help='TODO' ) + parser.add_option( '-Z', '--genes-expression-output', dest='genes_expression_output_file', help='TODO' ) + + (options, args) = parser.parse_args() + + # Make temp directory for output. + tmp_output_dir = tempfile.mkdtemp() + + # Build command. + + # Base. + cmd = "cufflinks" + + # Add options. + if options.inner_mean_dist: + cmd += ( " -m %i" % int ( options.inner_mean_dist ) ) + + # Add input files. + cmd += " " + options.input + + # Run + try: + proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_output_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) + returncode = proc.wait() + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += proc.stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + if returncode != 0: + raise Exception, stderr + except Exception, e: + stop_err( 'Error in cufflinks:\n' + str( e ) ) + + # TODO: look for errors in program output. + + # Copy output files from tmp directory to specified files. + try: + try: + shutil.copyfile( tmp_output_dir + "/transcripts.gtf", options.assembled_isoforms_output_file ) + shutil.copyfile( tmp_output_dir + "/transcripts.expr", options.transcripts_expression_output_file ) + shutil.copyfile( tmp_output_dir + "/genes.expr", options.genes_expression_output_file ) + except Exception, e: + stop_err( 'Error in tophat:\n' + str( e ) ) + finally: + # Clean up temp dirs + if os.path.exists( tmp_output_dir ): + shutil.rmtree( tmp_output_dir ) + +if __name__=="__main__": __main__() \ No newline at end of file diff -r 869e494a8074 -r 37c075416918 tools/ngs_rna/cufflinks_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ngs_rna/cufflinks_wrapper.xml Tue Apr 13 17:12:00 2010 -0400 @@ -0,0 +1,91 @@ +<tool id="cufflinks" name="Cufflinks" version="0.8.2"> + <description>Transcript assembly, differential expression, and differential regulation for RNA-Seq</description> + <command interpreter="python"> + cufflinks_wrapper.py + --input=$input + --assembled-isoforms-output=$assembled_isoforms + --transcripts-expression-output=$transcripts_expression + --genes-expression-output=$genes_expression + --num-threads="4" + #if $singlePaired.sPaired == "paired": + -r $singlePaired.mean_inner_distance + #end if + </command> + <inputs> + <param format="sam" name="input" type="data" label="SAM file of aligned RNA-Seq reads" help=""/> + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + + </when> + <when value="paired"> + <param name="mean_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs"/> + </when> + </conditional> + </inputs> + + <outputs> + <data format="expr" name="genes_expression" /> + <data format="expr" name="transcripts_expression" /> + <data format="gtf" name="assembled_isoforms" /> + </outputs> + + <tests> + <test> + <param name="sPaired" value="single"/> + <param name="input" value="cufflinks_in.sam"/> + <param name="mean_inner_distance" value="20"/> + <output name="assembled_isoforms" file="cufflinks_out1.gtf"/> +  + </test> + </tests> + + <help> +**Cufflinks Overview** + +Cufflinks_ assembles transcripts, estimates their abundances, and tests for differential expression and regulation in RNA-Seq samples. It accepts aligned RNA-Seq reads and assembles the alignments into a parsimonious set of transcripts. Cufflinks then estimates the relative abundances of these transcripts based on how many reads support each one. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. (manuscript in press) + +.. _Cufflinks: http://cufflinks.cbcb.umd.edu/ + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + +.. __: http://cufflinks.cbcb.umd.edu/manual.html + +------ + +**Input formats** + +Cufflinks accepts files in SAM format. + +------ + +**Outputs** + +TODO + +------- + +**Cufflinks settings** + +All of the options have a default value. You can change any of them. Some of the options in Cufflinks have been implemented here. + +------ + +**Cufflinks parameter list** + +This is a list of implemented Cufflinks options:: + + </help> +</tool> diff -r 869e494a8074 -r 37c075416918 tools/ngs_rna/tophat_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ngs_rna/tophat_wrapper.py Tue Apr 13 17:12:00 2010 -0400 @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +import optparse, os, shutil, sys, tempfile + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) + parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) + parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length', + help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' ) + parser.add_option( '-i', '--min-intron-length', dest='min_intron_length', + help='The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.' ) + parser.add_option( '-I', '--max-intron-length', dest='max_intron_length', + help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' ) + parser.add_option( '-s', '--solexa-quals', dest='solexa_quals', help='Use the Solexa scale for quality values in FASTQ files.' ) + parser.add_option( '-S', '--solexa.3-quals', dest='solexa_quals', + help='As of the Illumina GA pipeline version 1.3, quality scores are encoded in Phred-scaled base-64. Use this option for FASTQ files from pipeline 1.3 or later.' ) + parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' ) + parser.add_option( '-C', '--coverage-output', dest='coverage_output_file', help='Coverage output file; formate is WIG.' ) + parser.add_option( '-J', '--junctions-output', dest='junctions_output_file', help='Junctions output file; formate is BED.' ) + parser.add_option( '-H', '--hits-output', dest='accepted_hits_output_file', help='Accepted hits output file; formate is SAM.' ) + parser.add_option( '-D', '--indexes-dir', dest='indexes_directory', help='Indexes directory; location of .ebwt and .fa files.' ) + parser.add_option( '-r', '--mate-inner-dist', dest='mate_inner_dist', help='This is the expected (mean) inner distance between mate pairs. \ + For, example, for paired end runs with fragments selected at 300bp, \ + where each end is 50bp, you should set -r to be 200. There is no default, \ + and this parameter is required for paired end runs.') + (options, args) = parser.parse_args() + + # Make temp directory for output. + tmp_output_dir = tempfile.mkdtemp() + + # Build command. + + # Base. + cmd = "tophat -o %s " % ( tmp_output_dir ) + + # Add options. + if options.mate_inner_dist: + cmd += ( " -r %i" % int ( options.mate_inner_dist ) ) + + # Add index prefix. + cmd += " " + options.indexes_directory + + # Add input files. + cmd += " " + options.input1 + if options.mate_inner_dist: + # Using paired-end reads. + cmd += " " + options.input2 + + # Route program output to file. + cmd += " > %s" % tmp_output_dir + "/std_out.txt" + # Route program error output to file. + cmd += " 2> %s" % tmp_output_dir + "/std_err.txt" + + # Run. + try: + os.system( cmd ) + except Exception, e: + stop_err( 'Error in tophat:\n' + str( e ) ) + + # TODO: look for errors in program output. + + # Copy output files from tmp directory to specified files. + try: + shutil.copyfile( tmp_output_dir + "/coverage.wig", options.coverage_output_file ) + shutil.copyfile( tmp_output_dir + "/junctions.bed", options.junctions_output_file ) + shutil.copyfile( tmp_output_dir + "/accepted_hits.sam", options.accepted_hits_output_file ) + except Exception, e: + stop_err( 'Error in tophat:\n' + str( e ) ) + + # Clean up temp dirs + if os.path.exists( tmp_output_dir ): + shutil.rmtree( tmp_output_dir ) + +if __name__=="__main__": __main__() diff -r 869e494a8074 -r 37c075416918 tools/ngs_rna/tophat_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ngs_rna/tophat_wrapper.xml Tue Apr 13 17:12:00 2010 -0400 @@ -0,0 +1,129 @@ +<tool id="tophat" name="Tophat" version="1.0.13"> + <description>Find splice junctions using RNA-seq data</description> + <command interpreter="python"> + tophat_wrapper.py + --num-threads="4" + --coverage-output=$coverage + --junctions-output=$junctions + --hits-output=$accepted_hits + #if $refGenomeSource.genomeSource == "history": + --indexes-dir=$refGenomeSource.ownFile + #else: + --indexes-dir=$refGenomeSource.index.value + #end if + #if $singlePaired.sPaired == "single": + --input1=$singlePaired.input1 + --input2="None" + #else: + -r $singlePaired.mean_inner_distance + --input1=$singlePaired.input1 + --input2=$singlePaired.input2 + #end if + </command> + <inputs> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> + <options from_file="bowtie_indices.loc"> + <column name="value" index="1" /> + <column name="name" index="0" /> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + </when>  + </conditional>  + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/> + </when> + <when value="paired"> + <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/> + <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/> + <param format="fastqsanger" name="mean_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs"/> + </when> + </conditional> + </inputs> + + <outputs> + <data format="sam" name="accepted_hits"/> + <data format="wig" name="coverage" /> + <data format="bed" name="junctions" /> + </outputs> + + <tests> + <test> + <param name="genomeSource" value="indexed"/> + <param name="index" value="test_ref"/> + <param name="sPaired" value="paired"/> + <param name="input1" ftype="fastqsanger" value="tophat_in1.fq"/> + <param name="input2" ftype="fastqsanger" value="tophat_in2.fq"/> + <param name="mean_inner_distance" value="20"/> +  + <output name="coverage" file="tophat_out2.wig"/> + <output name="junctions" file="tophat_out3.bed"/> + </test> + </tests> + + <help> +**Tophat Overview** + +TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009). + +.. _Tophat: http://tophat.cbcb.umd.edu/ + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + +.. __: http://tophat.cbcb.umd.edu/manual.html + +------ + +**Input formats** + +Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files. + +------ + +**Outputs** + +Tophat produces three output files:: + + coverage.wig -- coverage of reads + accepted_hits.sam -- reads that were mapped onto genome + junctions.bed -- splice junctions identified by Tophat + +------- + +**Tophat settings** + +All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here. + +------ + +**Tophat parameter list** + +This is a list of implemented Tophat options:: + + -r This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments + selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter + is required for paired end runs. + </help> +</tool> diff -r 869e494a8074 -r 37c075416918 tools/tophat/tophat_wrapper.py --- a/tools/tophat/tophat_wrapper.py Tue Apr 13 17:02:56 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -#!/usr/bin/env python - -import optparse, os, shutil, sys, tempfile - -def stop_err( msg ): - sys.stderr.write( "%s\n" % msg ) - sys.exit() - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) - parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) - parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length', - help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' ) - parser.add_option( '-i', '--min-intron-length', dest='min_intron_length', - help='The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.' ) - parser.add_option( '-I', '--max-intron-length', dest='max_intron_length', - help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' ) - parser.add_option( '-s', '--solexa-quals', dest='solexa_quals', help='Use the Solexa scale for quality values in FASTQ files.' ) - parser.add_option( '-S', '--solexa.3-quals', dest='solexa_quals', - help='As of the Illumina GA pipeline version 1.3, quality scores are encoded in Phred-scaled base-64. Use this option for FASTQ files from pipeline 1.3 or later.' ) - parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' ) - parser.add_option( '-C', '--coverage-output', dest='coverage_output_file', help='Coverage output file; formate is WIG.' ) - parser.add_option( '-J', '--junctions-output', dest='junctions_output_file', help='Junctions output file; formate is BED.' ) - parser.add_option( '-H', '--hits-output', dest='accepted_hits_output_file', help='Accepted hits output file; formate is SAM.' ) - parser.add_option( '-D', '--indexes-dir', dest='indexes_directory', help='Indexes directory; location of .ebwt and .fa files.' ) - parser.add_option( '-r', '--mate-inner-dist', dest='mate_inner_dist', help='This is the expected (mean) inner distance between mate pairs. \ - For, example, for paired end runs with fragments selected at 300bp, \ - where each end is 50bp, you should set -r to be 200. There is no default, \ - and this parameter is required for paired end runs.') - (options, args) = parser.parse_args() - - # Make temp directory for output. - tmp_output_dir = tempfile.mkdtemp() - - # Build command. - - # Base. - cmd = "tophat -o %s " % ( tmp_output_dir ) - - # Add options. - if options.mate_inner_dist: - cmd += ( " -r %i" % int ( options.mate_inner_dist ) ) - - # Add index prefix. - cmd += " " + options.indexes_directory - - # Add input files. - cmd += " " + options.input1 - if options.mate_inner_dist: - # Using paired-end reads. - cmd += " " + options.input2 - - # Route program output to file. - cmd += " > %s" % tmp_output_dir + "/std_out.txt" - # Route program error output to file. - cmd += " 2> %s" % tmp_output_dir + "/std_err.txt" - - # Run. - try: - os.system( cmd ) - except Exception, e: - stop_err( 'Error in tophat:\n' + str( e ) ) - - # TODO: look for errors in program output. - - # Copy output files from tmp directory to specified files. - try: - shutil.copyfile( tmp_output_dir + "/coverage.wig", options.coverage_output_file ) - shutil.copyfile( tmp_output_dir + "/junctions.bed", options.junctions_output_file ) - shutil.copyfile( tmp_output_dir + "/accepted_hits.sam", options.accepted_hits_output_file ) - except Exception, e: - stop_err( 'Error in tophat:\n' + str( e ) ) - - # Clean up temp dirs - if os.path.exists( tmp_output_dir ): - shutil.rmtree( tmp_output_dir ) - -if __name__=="__main__": __main__() diff -r 869e494a8074 -r 37c075416918 tools/tophat/tophat_wrapper.xml --- a/tools/tophat/tophat_wrapper.xml Tue Apr 13 17:02:56 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ -<tool id="tophat" name="Tophat" version="1.0.13"> - <description>Find splice junctions using RNA-seq data</description> - <command interpreter="python"> - tophat_wrapper.py - --num-threads="4" - --coverage-output=$coverage - --junctions-output=$junctions - --hits-output=$accepted_hits - #if $refGenomeSource.genomeSource == "history": - --indexes-dir=$refGenomeSource.ownFile - #else: - --indexes-dir=$refGenomeSource.index.value - #end if - #if $singlePaired.sPaired == "single": - --input1=$singlePaired.input1 - --input2="None" - #else: - -r $singlePaired.mean_inner_distance - --input1=$singlePaired.input1 - --input2=$singlePaired.input2 - #end if - </command> - <inputs> - <conditional name="refGenomeSource"> - <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> - <options from_file="tophat_indices.loc"> - <column name="value" index="1" /> - <column name="name" index="0" /> - </options> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> - </when>  - </conditional>  - <conditional name="singlePaired"> - <param name="sPaired" type="select" label="Is this library mate-paired?"> - <option value="single">Single-end</option> - <option value="paired">Paired-end</option> - </param> - <when value="single"> - <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/> - </when> - <when value="paired"> - <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/> - <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/> - <param format="fastqsanger" name="mean_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs"/> - </when> - </conditional> - </inputs> - - <outputs> - <data format="sam" name="accepted_hits"/> - <data format="wig" name="coverage" /> - <data format="bed" name="junctions" /> - </outputs> - - <tests> - <test> - <param name="genomeSource" value="indexed"/> - <param name="index" value="test_ref"/> - <param name="sPaired" value="paired"/> - <param name="input1" ftype="fastqsanger" value="tophat_in1.fq"/> - <param name="input2" ftype="fastqsanger" value="tophat_in2.fq"/> - <param name="mean_inner_distance" value="20"/> -  - <output name="coverage" file="tophat_out2.wig"/> - <output name="junctions" file="tophat_out3.bed"/> - </test> - </tests> - - <help> -**Tophat Overview** - -TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009). - -.. _Tophat: http://tophat.cbcb.umd.edu/ - ------- - -**Know what you are doing** - -.. class:: warningmark - -There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. - -.. __: http://tophat.cbcb.umd.edu/manual.html - ------- - -**Input formats** - -Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files. - ------- - -**Outputs** - -Tophat produces three output files:: - - coverage.wig -- coverage of reads - accepted_hits.sam -- reads that were mapped onto genome - junctions.bed -- splice junctions identified by Tophat - -------- - -**Tophat settings** - -All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here. - ------- - -**Tophat parameter list** - -This is a list of implemented Tophat options:: - - -r This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments - selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter - is required for paired end runs. - </help> -</tool>

1 0

[hg] galaxy 3640: Fix tipsy tooltip staying on screen after dele...
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/67160108c887 changeset: 3640:67160108c887 user: Kanwei Li <kanwei(a)gmail.com> date: Tue Apr 13 17:15:47 2010 -0400 description: Fix tipsy tooltip staying on screen after deleting dataset diffstat: templates/root/history.mako | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diffs (11 lines): diff -r 869e494a8074 -r 67160108c887 templates/root/history.mako --- a/templates/root/history.mako Tue Apr 13 17:02:56 2010 -0400 +++ b/templates/root/history.mako Tue Apr 13 17:15:47 2010 -0400 @@ -47,6 +47,7 @@ } }); %endif + $(".tipsy").remove(); } }); return false;

1 0

[hg] galaxy 3639: Fix async updates in history panel
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/869e494a8074 changeset: 3639:869e494a8074 user: Kanwei Li <kanwei(a)gmail.com> date: Tue Apr 13 17:02:56 2010 -0400 description: Fix async updates in history panel diffstat: templates/root/history.mako | 26 +++++++++++++------------- 1 files changed, 13 insertions(+), 13 deletions(-) diffs (61 lines): diff -r e3167368345a -r 869e494a8074 templates/root/history.mako --- a/templates/root/history.mako Tue Apr 13 15:35:32 2010 -0400 +++ b/templates/root/history.mako Tue Apr 13 17:02:56 2010 -0400 @@ -19,6 +19,9 @@ <script type="text/javascript"> +<% TERMINAL_STATES = ["ok", "error", "empty", "deleted", "discarded"] %> +TERMINAL_STATES = ${ h.to_json_string(TERMINAL_STATES) }; + $(function() { var historywrapper = $("div.historyItemWrapper"); init_history_items(historywrapper); @@ -175,7 +178,7 @@ // Updater updater( - ${ h.to_json_string( dict([(data.id, data.state) for data in reversed( datasets ) if data.visible and data.state not in [ "deleted", "empty", "error", "ok" ]]) ) } + ${ h.to_json_string( dict([(data.id, data.state) for data in reversed( datasets ) if data.visible and data.state not in TERMINAL_STATES]) ) } ); // Navigate to a dataset. @@ -194,18 +197,16 @@ empty = false; break; } - if ( ! empty ) { - // console.log( "Updater running in 3 seconds" ); - setTimeout( function() { updater_callback( tracked_datasets ) }, 3000 ); - } else { - // console.log( "Updater finished" ); + if ( !empty ) { + setTimeout( function() { updater_callback( tracked_datasets ) }, 4000 ); } }; var updater_callback = function ( tracked_datasets ) { // Build request data - var ids = [] - var states = [] - var force_history_refresh = false + var ids = [], + states = [], + force_history_refresh = false; + $.each( tracked_datasets, function ( id, state ) { ids.push( id ); states.push( state ); @@ -221,10 +222,9 @@ // Replace HTML var container = $("#historyItemContainer-" + id); container.html( val.html ); - setupHistoryItem( container.children( ".historyItemWrapper" ) ); - initShowHide(); - // If new state was terminal, stop tracking - if (( val.state == "ok") || ( val.state == "error") || ( val.state == "empty") || ( val.state == "deleted" ) || ( val.state == "discarded" )) { + init_history_items( $("div.historyItemWrapper"), "noinit" ); + // If new state is terminal, stop tracking + if (TERMINAL_STATES.indexOf(val.state) !== -1) { if ( val.force_history_refresh ){ force_history_refresh = true; }

1 0

[hg] galaxy 3638: Decouple the TagHandler from the model by allo...
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/e3167368345a changeset: 3638:e3167368345a user: Greg Von Kuster <greg(a)bx.psu.edu> date: Tue Apr 13 15:35:32 2010 -0400 description: Decouple the TagHandler from the model by allowing sub-classes to point to a specified model. Add a baseline functional tests script to provide coverage for tagging histories and history items ( more coverage is needed, of course, and can easily be added to this script ). diffstat: lib/galaxy/app.py | 4 +- lib/galaxy/tags/tag_handler.py | 269 +++++++++++++---------------- lib/galaxy/web/controllers/history.py | 2 +- lib/galaxy/web/controllers/tag.py | 202 ++++++++++------------ lib/galaxy/web/controllers/user.py | 2 +- lib/galaxy/web/framework/helpers/grids.py | 6 +- templates/tagging_common.mako | 5 +- templates/user/index.mako | 3 +- test/base/twilltestcase.py | 16 + test/functional/test_tags.py | 63 +++++++ 10 files changed, 299 insertions(+), 273 deletions(-) diffs (930 lines): diff -r 4fb48981bdb0 -r e3167368345a lib/galaxy/app.py --- a/lib/galaxy/app.py Tue Apr 13 13:14:59 2010 -0400 +++ b/lib/galaxy/app.py Tue Apr 13 15:35:32 2010 -0400 @@ -1,11 +1,11 @@ import sys, os, atexit from galaxy import config, jobs, util, tools, web, cloud -## from galaxy.tracks import store from galaxy.web import security import galaxy.model import galaxy.datatypes.registry import galaxy.security +from galaxy.tags.tag_handler import GalaxyTagHandler class UniverseApplication( object ): """Encapsulates the state of a Universe application""" @@ -33,6 +33,8 @@ self.config.database_engine_options ) # Security helper self.security = security.SecurityHelper( id_secret=self.config.id_secret ) + # Tag handler + self.tag_handler = GalaxyTagHandler() # Initialize the tools self.toolbox = tools.ToolBox( self.config.tool_config, self.config.tool_path, self ) # Load datatype converters diff -r 4fb48981bdb0 -r e3167368345a lib/galaxy/tags/tag_handler.py --- a/lib/galaxy/tags/tag_handler.py Tue Apr 13 13:14:59 2010 -0400 +++ b/lib/galaxy/tags/tag_handler.py Tue Apr 13 15:35:32 2010 -0400 @@ -1,148 +1,117 @@ -from galaxy import model -import re +import re, logging from sqlalchemy.sql.expression import func, and_ from sqlalchemy.sql import select +log = logging.getLogger( __name__ ) + +# Item-specific information needed to perform tagging. +class ItemTagAssocInfo( object ): + def __init__( self, item_class, tag_assoc_class, item_id_col ): + self.item_class = item_class + self.tag_assoc_class = tag_assoc_class + self.item_id_col = item_id_col + class TagHandler( object ): - - # Minimum tag length. - min_tag_len = 2 - - # Maximum tag length. - max_tag_len = 255 - - # Tag separator. - tag_separators = ',;' - - # Hierarchy separator. - hierarchy_separator = '.' - - # Key-value separator. - key_value_separators = "=:" - - # Item-specific information needed to perform tagging. - class ItemTagAssocInfo( object ): - def __init__( self, item_class, tag_assoc_class, item_id_col ): - self.item_class = item_class - self.tag_assoc_class = tag_assoc_class - self.item_id_col = item_id_col - - # Initialize with known classes. - item_tag_assoc_info = {} - item_tag_assoc_info["History"] = ItemTagAssocInfo( model.History, model.HistoryTagAssociation, model.HistoryTagAssociation.table.c.history_id ) - item_tag_assoc_info["HistoryDatasetAssociation"] = \ - ItemTagAssocInfo( model.HistoryDatasetAssociation, model.HistoryDatasetAssociationTagAssociation, model.HistoryDatasetAssociationTagAssociation.table.c.history_dataset_association_id ) - item_tag_assoc_info["Page"] = ItemTagAssocInfo( model.Page, model.PageTagAssociation, model.PageTagAssociation.table.c.page_id ) - item_tag_assoc_info["StoredWorkflow"] = ItemTagAssocInfo( model.StoredWorkflow, model.StoredWorkflowTagAssociation, model.StoredWorkflowTagAssociation.table.c.stored_workflow_id ) - item_tag_assoc_info["Visualization"] = ItemTagAssocInfo( model.Visualization, model.VisualizationTagAssociation, model.VisualizationTagAssociation.table.c.visualization_id ) - - def get_tag_assoc_class(self, item_class): - """ Returns tag association class for item class. """ + def __init__( self ): + # Minimum tag length. + self.min_tag_len = 2 + # Maximum tag length. + self.max_tag_len = 255 + # Tag separator. + self.tag_separators = ',;' + # Hierarchy separator. + self.hierarchy_separator = '.' + # Key-value separator. + self.key_value_separators = "=:" + # Initialize with known classes - add to this in subclasses. + self.item_tag_assoc_info = {} + def get_tag_assoc_class( self, item_class ): + """Returns tag association class for item class.""" return self.item_tag_assoc_info[item_class.__name__].tag_assoc_class - - def get_id_col_in_item_tag_assoc_table( self, item_class): - """ Returns item id column in class' item-tag association table. """ + def get_id_col_in_item_tag_assoc_table( self, item_class ): + """Returns item id column in class' item-tag association table.""" return self.item_tag_assoc_info[item_class.__name__].item_id_col - - def get_community_tags(self, sa_session, item=None, limit=None): - """ Returns community tags for an item. """ - + def get_community_tags( self, trans, item=None, limit=None ): + """Returns community tags for an item.""" # Get item-tag association class. item_class = item.__class__ item_tag_assoc_class = self.get_tag_assoc_class( item_class ) if not item_tag_assoc_class: return [] - # Build select statement. - cols_to_select = [ item_tag_assoc_class.table.c.tag_id, func.count('*') ] - from_obj = item_tag_assoc_class.table.join( item_class.table ).join( model.Tag.table ) - where_clause = ( self.get_id_col_in_item_tag_assoc_table(item_class) == item.id ) - order_by = [ func.count("*").desc() ] + cols_to_select = [ item_tag_assoc_class.table.c.tag_id, func.count( '*' ) ] + from_obj = item_tag_assoc_class.table.join( item_class.table ).join( trans.app.model.Tag.table ) + where_clause = ( self.get_id_col_in_item_tag_assoc_table( item_class ) == item.id ) + order_by = [ func.count( "*" ).desc() ] group_by = item_tag_assoc_class.table.c.tag_id - # Do query and get result set. - query = select(columns=cols_to_select, from_obj=from_obj, - whereclause=where_clause, group_by=group_by, order_by=order_by, limit=limit) - result_set = sa_session.execute(query) - + query = select( columns=cols_to_select, + from_obj=from_obj, + whereclause=where_clause, + group_by=group_by, + order_by=order_by, + limit=limit ) + result_set = trans.sa_session.execute( query ) # Return community tags. community_tags = [] for row in result_set: tag_id = row[0] - community_tags.append( self.get_tag_by_id( sa_session, tag_id ) ) - + community_tags.append( self.get_tag_by_id( trans, tag_id ) ) return community_tags - def remove_item_tag( self, trans, user, item, tag_name ): """Remove a tag from an item.""" # Get item tag association. - item_tag_assoc = self._get_item_tag_assoc(user, item, tag_name) - + item_tag_assoc = self._get_item_tag_assoc( user, item, tag_name ) # Remove association. if item_tag_assoc: # Delete association. trans.sa_session.delete( item_tag_assoc ) - item.tags.remove(item_tag_assoc) + item.tags.remove( item_tag_assoc ) return True - return False - def delete_item_tags( self, trans, user, item ): """Delete tags from an item.""" # Delete item-tag associations. for tag in item.tags: trans.sa_session.delete( tag ) - # Delete tags from item. del item.tags[:] - - def item_has_tag(self, user, item, tag): + def item_has_tag( self, trans, user, item, tag ): """Returns true if item is has a given tag.""" # Get tag name. - if isinstance(tag, basestring): + if isinstance( tag, basestring ): tag_name = tag - elif isinstance(tag, model.Tag): + elif isinstance( tag, trans.app.model.Tag ): tag_name = tag.name - # Check for an item-tag association to see if item has a given tag. - item_tag_assoc = self._get_item_tag_assoc(user, item, tag_name) + item_tag_assoc = self._get_item_tag_assoc( user, item, tag_name ) if item_tag_assoc: return True return False - - - def apply_item_tags(self, db_session, user, item, tags_str): + def apply_item_tags( self, trans, user, item, tags_str ): """Apply tags to an item.""" # Parse tags. - parsed_tags = self.parse_tags(tags_str) - + parsed_tags = self.parse_tags( tags_str ) # Apply each tag. for name, value in parsed_tags.items(): # Use lowercase name for searching/creating tag. lc_name = name.lower() - # Get or create item-tag association. - item_tag_assoc = self._get_item_tag_assoc(user, item, lc_name) + item_tag_assoc = self._get_item_tag_assoc( user, item, lc_name ) if not item_tag_assoc: - # # Create item-tag association. - # - # Create tag; if None, skip the tag (and log error). - tag = self._get_or_create_tag(db_session, lc_name) + tag = self._get_or_create_tag( trans, lc_name ) if not tag: # Log error? continue - # Create tag association based on item class. item_tag_assoc_class = self.get_tag_assoc_class( item.__class__ ) item_tag_assoc = item_tag_assoc_class() - # Add tag to association. - item.tags.append(item_tag_assoc) + item.tags.append( item_tag_assoc ) item_tag_assoc.tag = tag - item_tag_assoc.user = user - + item_tag_assoc.user = user # Apply attributes to item-tag association. Strip whitespace from user name and tag. lc_value = None if value: @@ -150,144 +119,142 @@ item_tag_assoc.user_tname = name item_tag_assoc.user_value = value item_tag_assoc.value = lc_value - - def get_tags_str(self, tags): + def get_tags_str( self, tags ): """Build a string from an item's tags.""" # Return empty string if there are no tags. if not tags: return "" - # Create string of tags. tags_str_list = list() for tag in tags: tag_str = tag.user_tname if tag.value is not None: tag_str += ":" + tag.user_value - tags_str_list.append(tag_str) - return ", ".join(tags_str_list) - - def get_tag_by_id(self, db_session, tag_id): + tags_str_list.append( tag_str ) + return ", ".join( tags_str_list ) + def get_tag_by_id( self, trans, tag_id ): """Get a Tag object from a tag id.""" - return db_session.query( model.Tag ).filter_by( id=tag_id) .first() - - def get_tag_by_name(self, db_session, tag_name): + return trans.sa_session.query( trans.app.model.Tag ).filter_by( id=tag_id ).first() + def get_tag_by_name( self, trans, tag_name ): """Get a Tag object from a tag name (string).""" if tag_name: - return db_session.query( model.Tag ).filter_by( name=tag_name.lower() ).first() + return trans.sa_session.query( trans.app.model.Tag ).filter_by( name=tag_name.lower() ).first() return None - - def _create_tag(self, db_session, tag_str): + def _create_tag( self, trans, tag_str ): """Create a Tag object from a tag string.""" - tag_hierarchy = tag_str.split(self.__class__.hierarchy_separator) + tag_hierarchy = tag_str.split( self.hierarchy_separator ) tag_prefix = "" parent_tag = None for sub_tag in tag_hierarchy: # Get or create subtag. - tag_name = tag_prefix + self._scrub_tag_name(sub_tag) - tag = db_session.query( model.Tag ).filter_by( name=tag_name).first() + tag_name = tag_prefix + self._scrub_tag_name( sub_tag ) + tag = trans.sa_session.query( trans.app.model.Tag ).filter_by( name=tag_name).first() if not tag: - tag = model.Tag(type=0, name=tag_name) - + tag = trans.app.model.Tag( type=0, name=tag_name ) # Set tag parent. tag.parent = parent_tag - # Update parent and tag prefix. parent_tag = tag - tag_prefix = tag.name + self.__class__.hierarchy_separator + tag_prefix = tag.name + self.hierarchy_separator return tag - - def _get_or_create_tag(self, db_session, tag_str): + def _get_or_create_tag( self, trans, tag_str ): """Get or create a Tag object from a tag string.""" # Scrub tag; if tag is None after being scrubbed, return None. - scrubbed_tag_str = self._scrub_tag_name(tag_str) + scrubbed_tag_str = self._scrub_tag_name( tag_str ) if not scrubbed_tag_str: return None - # Get item tag. - tag = self.get_tag_by_name(db_session, scrubbed_tag_str) - + tag = self.get_tag_by_name( trans, scrubbed_tag_str ) # Create tag if necessary. if tag is None: - tag = self._create_tag(db_session, scrubbed_tag_str) - + tag = self._create_tag( trans, scrubbed_tag_str ) return tag - def _get_item_tag_assoc( self, user, item, tag_name ): - """Return ItemTagAssociation object for a user, item, and tag string; returns None if there is - no such association.""" + """ + Return ItemTagAssociation object for a user, item, and tag string; returns None if there is + no such association. + """ scrubbed_tag_name = self._scrub_tag_name( tag_name ) for item_tag_assoc in item.tags: if ( item_tag_assoc.user == user ) and ( item_tag_assoc.user_tname == scrubbed_tag_name ): return item_tag_assoc return None - - def parse_tags(self, tag_str): - """Returns a list of raw (tag-name, value) pairs derived from a string; method scrubs tag names and values as well. - Return value is a dictionary where tag-names are keys.""" + def parse_tags( self, tag_str ): + """ + Returns a list of raw (tag-name, value) pairs derived from a string; method scrubs tag names and values as well. + Return value is a dictionary where tag-names are keys. + """ # Gracefully handle None. if not tag_str: return dict() - # Split tags based on separators. - reg_exp = re.compile('[' + self.__class__.tag_separators + ']') - raw_tags = reg_exp.split(tag_str) - + reg_exp = re.compile( '[' + self.tag_separators + ']' ) + raw_tags = reg_exp.split( tag_str ) # Extract name-value pairs. name_value_pairs = dict() for raw_tag in raw_tags: - nv_pair = self._get_name_value_pair(raw_tag) + nv_pair = self._get_name_value_pair( raw_tag ) scrubbed_name = self._scrub_tag_name( nv_pair[0] ) scrubbed_value = self._scrub_tag_value( nv_pair[1] ) name_value_pairs[scrubbed_name] = scrubbed_value return name_value_pairs - - def _scrub_tag_value(self, value): + def _scrub_tag_value( self, value ): """Scrub a tag value.""" # Gracefully handle None: if not value: return None - # Remove whitespace from value. - reg_exp = re.compile('\s') - scrubbed_value = re.sub(reg_exp, "", value) - + reg_exp = re.compile( '\s' ) + scrubbed_value = re.sub( reg_exp, "", value ) return scrubbed_value - - def _scrub_tag_name(self, name): + def _scrub_tag_name( self, name ): """Scrub a tag name.""" # Gracefully handle None: if not name: return None - # Remove whitespace from name. - reg_exp = re.compile('\s') - scrubbed_name = re.sub(reg_exp, "", name) - + reg_exp = re.compile( '\s' ) + scrubbed_name = re.sub( reg_exp, "", name ) # Ignore starting ':' char. - if scrubbed_name.startswith(self.__class__.hierarchy_separator): + if scrubbed_name.startswith( self.hierarchy_separator ): scrubbed_name = scrubbed_name[1:] - # If name is too short or too long, return None. - if len(scrubbed_name) < self.min_tag_len or len(scrubbed_name) > self.max_tag_len: + if len( scrubbed_name ) < self.min_tag_len or len( scrubbed_name ) > self.max_tag_len: return None - return scrubbed_name - - def _scrub_tag_name_list(self, tag_name_list): + def _scrub_tag_name_list( self, tag_name_list ): """Scrub a tag name list.""" scrubbed_tag_list = list() for tag in tag_name_list: - scrubbed_tag_list.append( self._scrub_tag_name(tag) ) + scrubbed_tag_list.append( self._scrub_tag_name( tag ) ) return scrubbed_tag_list - - def _get_name_value_pair(self, tag_str): + def _get_name_value_pair( self, tag_str ): """Get name, value pair from a tag string.""" # Use regular expression to parse name, value. - reg_exp = re.compile( "[" + self.__class__.key_value_separators + "]" ) + reg_exp = re.compile( "[" + self.key_value_separators + "]" ) name_value_pair = reg_exp.split( tag_str ) - # Add empty slot if tag does not have value. - if len(name_value_pair) < 2: - name_value_pair.append(None) - - return name_value_pair \ No newline at end of file + if len( name_value_pair ) < 2: + name_value_pair.append( None ) + return name_value_pair + +class GalaxyTagHandler( TagHandler ): + def __init__( self ): + from galaxy import model + TagHandler.__init__( self ) + self.item_tag_assoc_info["History"] = ItemTagAssocInfo( model.History, + model.HistoryTagAssociation, + model.HistoryTagAssociation.table.c.history_id ) + self.item_tag_assoc_info["HistoryDatasetAssociation"] = \ + ItemTagAssocInfo( model.HistoryDatasetAssociation, + model.HistoryDatasetAssociationTagAssociation, + model.HistoryDatasetAssociationTagAssociation.table.c.history_dataset_association_id ) + self.item_tag_assoc_info["Page"] = ItemTagAssocInfo( model.Page, + model.PageTagAssociation, + model.PageTagAssociation.table.c.page_id ) + self.item_tag_assoc_info["StoredWorkflow"] = ItemTagAssocInfo( model.StoredWorkflow, + model.StoredWorkflowTagAssociation, + model.StoredWorkflowTagAssociation.table.c.stored_workflow_id ) + self.item_tag_assoc_info["Visualization"] = ItemTagAssocInfo( model.Visualization, + model.VisualizationTagAssociation, + model.VisualizationTagAssociation.table.c.visualization_id ) diff -r 4fb48981bdb0 -r e3167368345a lib/galaxy/web/controllers/history.py --- a/lib/galaxy/web/controllers/history.py Tue Apr 13 13:14:59 2010 -0400 +++ b/lib/galaxy/web/controllers/history.py Tue Apr 13 15:35:32 2010 -0400 @@ -5,7 +5,7 @@ from galaxy.model.orm import * from galaxy.util.json import * from galaxy.util.sanitize_html import sanitize_html -from galaxy.tags.tag_handler import TagHandler +from galaxy.tags.tag_handler import GalaxyTagHandler from sqlalchemy.sql.expression import ClauseElement import webhelpers, logging, operator from datetime import datetime diff -r 4fb48981bdb0 -r e3167368345a lib/galaxy/web/controllers/tag.py --- a/lib/galaxy/web/controllers/tag.py Tue Apr 13 13:14:59 2010 -0400 +++ b/lib/galaxy/web/controllers/tag.py Tue Apr 13 15:35:32 2010 -0400 @@ -1,203 +1,185 @@ """ Tags Controller: handles tagging/untagging of entities and provides autocomplete support. """ - +import logging from galaxy.web.base.controller import * -from galaxy.tags.tag_handler import * from sqlalchemy.sql.expression import func, and_ from sqlalchemy.sql import select +log = logging.getLogger( __name__ ) + class TagsController ( BaseController ): - - def __init__(self, app): - BaseController.__init__(self, app) - self.tag_handler = TagHandler() - + def __init__( self, app ): + BaseController.__init__( self, app ) + self.tag_handler = app.tag_handler @web.expose @web.require_login( "edit item tags" ) def get_tagging_elt_async( self, trans, item_id, item_class, elt_context="" ): """ Returns HTML for editing an item's tags. """ item = self._get_item( trans, item_class, trans.security.decode_id( item_id ) ) if not item: - return trans.show_error_message( "No item of class %s with id % " % ( item_class, item_id ) ) - user = trans.get_user() - return trans.fill_template( "/tagging_common.mako", tag_type="individual", user=trans.get_user(), tagged_item=item, elt_context=elt_context, - in_form=False, input_size="22", tag_click_fn="default_tag_click_fn", use_toggle_link=False ) - + return trans.show_error_message( "No item of class %s with id %s " % ( item_class, item_id ) ) + return trans.fill_template( "/tagging_common.mako", + tag_type="individual", + user=trans.user, + tagged_item=item, + elt_context=elt_context, + in_form=False, + input_size="22", + tag_click_fn="default_tag_click_fn", + use_toggle_link=False ) @web.expose @web.require_login( "add tag to an item" ) def add_tag_async( self, trans, item_id=None, item_class=None, new_tag=None, context=None ): - """ Add tag to an item. """ - + """ Add tag to an item. """ # Apply tag. item = self._get_item( trans, item_class, trans.security.decode_id( item_id ) ) - user = trans.get_user() - self.tag_handler.apply_item_tags( trans.sa_session, user, item, new_tag.encode('utf-8') ) + user = trans.user + self.tag_handler.apply_item_tags( trans, user, item, new_tag.encode( 'utf-8' ) ) trans.sa_session.flush() - # Log. - params = dict( item_id=item.id, item_class=item_class, tag=new_tag) + params = dict( item_id=item.id, item_class=item_class, tag=new_tag ) trans.log_action( user, unicode( "tag" ), context, params ) - @web.expose @web.require_login( "remove tag from an item" ) def remove_tag_async( self, trans, item_id=None, item_class=None, tag_name=None, context=None ): """ Remove tag from an item. """ - # Remove tag. - item = self._get_item( trans, item_class, trans.security.decode_id( item_id) ) - user = trans.get_user() - self.tag_handler.remove_item_tag( trans, user, item, tag_name.encode('utf-8') ) + item = self._get_item( trans, item_class, trans.security.decode_id( item_id ) ) + user = trans.user + self.tag_handler.remove_item_tag( trans, user, item, tag_name.encode( 'utf-8' ) ) trans.sa_session.flush() - # Log. - params = dict( item_id=item.id, item_class=item_class, tag=tag_name) - trans.log_action( user, unicode( "untag"), context, params ) - + params = dict( item_id=item.id, item_class=item_class, tag=tag_name ) + trans.log_action( user, unicode( "untag" ), context, params ) # Retag an item. All previous tags are deleted and new tags are applied. #(a)web.expose @web.require_login( "Apply a new set of tags to an item; previous tags are deleted." ) def retag_async( self, trans, item_id=None, item_class=None, new_tags=None ): """ Apply a new set of tags to an item; previous tags are deleted. """ - # Apply tags. item = self._get_item( trans, item_class, trans.security.decode_id( item_id ) ) - user = trans.get_user() - tag_handler.delete_item_tags( trans, item ) - self.tag_handler.apply_item_tags( trans.sa_session, user, item, new_tags.encode('utf-8') ) - trans.sa_session.flush() - + user = trans.user + self.tag_handler.delete_item_tags( trans, item ) + self.tag_handler.apply_item_tags( trans, user, item, new_tags.encode( 'utf-8' ) ) + trans.sa_session.flush() @web.expose @web.require_login( "get autocomplete data for an item's tags" ) def tag_autocomplete_data( self, trans, q=None, limit=None, timestamp=None, item_id=None, item_class=None ): """ Get autocomplete data for an item's tags. """ - - # # Get item, do security check, and get autocomplete data. - # item = None if item_id is not None: item = self._get_item( trans, item_class, trans.security.decode_id( item_id ) ) - user = trans.get_user() + user = trans.user item_class = self.get_class( item_class ) - - q = q.encode('utf-8') - if q.find(":") == -1: - return self._get_tag_autocomplete_names(trans, q, limit, timestamp, user, item, item_class) + q = q.encode( 'utf-8' ) + if q.find( ":" ) == -1: + return self._get_tag_autocomplete_names( trans, q, limit, timestamp, user, item, item_class ) else: - return self._get_tag_autocomplete_values(trans, q, limit, timestamp, user, item, item_class) - + return self._get_tag_autocomplete_values( trans, q, limit, timestamp, user, item, item_class ) def _get_tag_autocomplete_names( self, trans, q, limit, timestamp, user=None, item=None, item_class=None ): - """Returns autocomplete data for tag names ordered from most frequently used to - least frequently used.""" - # + """ + Returns autocomplete data for tag names ordered from most frequently used to + least frequently used. + """ # Get user's item tags and usage counts. - # - # Get item's class object and item-tag association class. if item is None and item_class is None: - raise RuntimeError("Both item and item_class cannot be None") + raise RuntimeError( "Both item and item_class cannot be None" ) elif item is not None: item_class = item.__class__ - - item_tag_assoc_class = self.tag_handler.get_tag_assoc_class(item_class) - + item_tag_assoc_class = self.tag_handler.get_tag_assoc_class( item_class ) # Build select statement. - cols_to_select = [ item_tag_assoc_class.table.c.tag_id, func.count('*') ] - from_obj = item_tag_assoc_class.table.join( item_class.table ).join( model.Tag.table ) - where_clause = and_( - model.Tag.table.c.name.like(q + "%"), - item_tag_assoc_class.table.c.user_id == user.id - ) - order_by = [ func.count("*").desc() ] + cols_to_select = [ item_tag_assoc_class.table.c.tag_id, func.count( '*' ) ] + from_obj = item_tag_assoc_class.table.join( item_class.table ).join( trans.app.model.Tag.table ) + where_clause = and_( trans.app.model.Tag.table.c.name.like( q + "%" ), + item_tag_assoc_class.table.c.user_id == user.id ) + order_by = [ func.count( "*" ).desc() ] group_by = item_tag_assoc_class.table.c.tag_id - # Do query and get result set. - query = select(columns=cols_to_select, from_obj=from_obj, - whereclause=where_clause, group_by=group_by, order_by=order_by, limit=limit) - result_set = trans.sa_session.execute(query) - + query = select( columns=cols_to_select, + from_obj=from_obj, + whereclause=where_clause, + group_by=group_by, + order_by=order_by, + limit=limit ) + result_set = trans.sa_session.execute( query ) # Create and return autocomplete data. ac_data = "#Header|Your Tags\n" for row in result_set: - tag = self.tag_handler.get_tag_by_id(trans.sa_session, row[0]) - + tag = self.tag_handler.get_tag_by_id( trans, row[0] ) # Exclude tags that are already applied to the item. - if ( item is not None ) and ( self.tag_handler.item_has_tag( trans.get_user(), item, tag ) ): + if ( item is not None ) and ( self.tag_handler.item_has_tag( trans, trans.user, item, tag ) ): continue # Add tag to autocomplete data. Use the most frequent name that user # has employed for the tag. - tag_names = self._get_usernames_for_tag(trans.sa_session, trans.get_user(), - tag, item_class, item_tag_assoc_class) + tag_names = self._get_usernames_for_tag( trans, trans.user, tag, item_class, item_tag_assoc_class ) ac_data += tag_names[0] + "|" + tag_names[0] + "\n" - return ac_data - - def _get_tag_autocomplete_values(self, trans, q, limit, timestamp, user=None, item=None, item_class=None): - """Returns autocomplete data for tag values ordered from most frequently used to - least frequently used.""" - - tag_name_and_value = q.split(":") + def _get_tag_autocomplete_values( self, trans, q, limit, timestamp, user=None, item=None, item_class=None ): + """ + Returns autocomplete data for tag values ordered from most frequently used to + least frequently used. + """ + tag_name_and_value = q.split( ":" ) tag_name = tag_name_and_value[0] tag_value = tag_name_and_value[1] - tag = self.tag_handler.get_tag_by_name(trans.sa_session, tag_name) + tag = self.tag_handler.get_tag_by_name( trans, tag_name ) # Don't autocomplete if tag doesn't exist. if tag is None: return "" - # Get item's class object and item-tag association class. if item is None and item_class is None: - raise RuntimeError("Both item and item_class cannot be None") + raise RuntimeError( "Both item and item_class cannot be None" ) elif item is not None: item_class = item.__class__ - - item_tag_assoc_class = self.tag_handler.get_tag_assoc_class(item_class) - + item_tag_assoc_class = self.tag_handler.get_tag_assoc_class( item_class ) # Build select statement. - cols_to_select = [ item_tag_assoc_class.table.c.value, func.count('*') ] - from_obj = item_tag_assoc_class.table.join( item_class.table ).join( model.Tag.table ) + cols_to_select = [ item_tag_assoc_class.table.c.value, func.count( '*' ) ] + from_obj = item_tag_assoc_class.table.join( item_class.table ).join( trans.app.model.Tag.table ) where_clause = and_( item_tag_assoc_class.table.c.user_id == user.id, - model.Tag.table.c.id==tag.id, - item_tag_assoc_class.table.c.value.like(tag_value + "%") ) - order_by = [ func.count("*").desc(), item_tag_assoc_class.table.c.value ] + trans.app.model.Tag.table.c.id == tag.id, + item_tag_assoc_class.table.c.value.like( tag_value + "%" ) ) + order_by = [ func.count("*").desc(), item_tag_assoc_class.table.c.value ] group_by = item_tag_assoc_class.table.c.value - # Do query and get result set. - query = select(columns=cols_to_select, from_obj=from_obj, - whereclause=where_clause, group_by=group_by, order_by=order_by, limit=limit) - result_set = trans.sa_session.execute(query) - + query = select( columns=cols_to_select, + from_obj=from_obj, + whereclause=where_clause, + group_by=group_by, + order_by=order_by, + limit=limit ) + result_set = trans.sa_session.execute( query ) # Create and return autocomplete data. - ac_data = "#Header|Your Values for '%s'\n" % (tag_name) - tag_uname = self._get_usernames_for_tag(trans.sa_session, trans.get_user(), tag, item_class, item_tag_assoc_class)[0] + ac_data = "#Header|Your Values for '%s'\n" % ( tag_name ) + tag_uname = self._get_usernames_for_tag( trans, trans.user, tag, item_class, item_tag_assoc_class )[0] for row in result_set: ac_data += tag_uname + ":" + row[0] + "|" + row[0] + "\n" return ac_data - - def _get_usernames_for_tag(self, db_session, user, tag, item_class, item_tag_assoc_class): - """ Returns an ordered list of the user names for a tag; list is ordered from - most popular to least popular name.""" - + def _get_usernames_for_tag( self, trans, user, tag, item_class, item_tag_assoc_class ): + """ + Returns an ordered list of the user names for a tag; list is ordered from + most popular to least popular name. + """ # Build select stmt. - cols_to_select = [ item_tag_assoc_class.table.c.user_tname, func.count('*') ] + cols_to_select = [ item_tag_assoc_class.table.c.user_tname, func.count( '*' ) ] where_clause = and_( item_tag_assoc_class.table.c.user_id == user.id, item_tag_assoc_class.table.c.tag_id == tag.id ) group_by = item_tag_assoc_class.table.c.user_tname - order_by = [ func.count("*").desc() ] - + order_by = [ func.count( "*" ).desc() ] # Do query and get result set. - query = select(columns=cols_to_select, whereclause=where_clause, - group_by=group_by, order_by=order_by) - result_set = db_session.execute(query) - + query = select( columns=cols_to_select, + whereclause=where_clause, + group_by=group_by, + order_by=order_by ) + result_set = trans.sa_session.execute( query ) user_tag_names = list() for row in result_set: - user_tag_names.append(row[0]) - + user_tag_names.append( row[0] ) return user_tag_names - def _get_item( self, trans, item_class_name, id ): """ Get an item based on type and id. """ item_class = self.tag_handler.item_tag_assoc_info[item_class_name].item_class - item = trans.sa_session.query(item_class).filter("id=" + str(id))[0] + item = trans.sa_session.query( item_class ).filter( "id=" + str( id ) )[0] return item diff -r 4fb48981bdb0 -r e3167368345a lib/galaxy/web/controllers/user.py --- a/lib/galaxy/web/controllers/user.py Tue Apr 13 13:14:59 2010 -0400 +++ b/lib/galaxy/web/controllers/user.py Tue Apr 13 15:35:32 2010 -0400 @@ -89,7 +89,7 @@ else: refresh_frames = [ 'masthead', 'history' ] else: - refresh_frames = [] + refresh_frames = [ 'masthead' ] # Since logging an event requires a session, we'll log prior to ending the session trans.log_event( "User logged out" ) trans.handle_user_logout() diff -r 4fb48981bdb0 -r e3167368345a lib/galaxy/web/framework/helpers/grids.py --- a/lib/galaxy/web/framework/helpers/grids.py Tue Apr 13 13:14:59 2010 -0400 +++ b/lib/galaxy/web/framework/helpers/grids.py Tue Apr 13 15:35:32 2010 -0400 @@ -3,7 +3,7 @@ from galaxy.web.base import controller from galaxy.web.framework.helpers import iff -from galaxy.tags.tag_handler import TagHandler +from galaxy.tags.tag_handler import GalaxyTagHandler from galaxy.web import url_for from galaxy.util.json import from_json_string, to_json_string from galaxy.util.odict import odict @@ -399,7 +399,7 @@ return query def get_filter( self, user, column_filter ): # Parse filter to extract multiple tags. - tag_handler = TagHandler() + tag_handler = GalaxyTagHandler() if isinstance( column_filter, list ): # Collapse list of tags into a single string; this is redundant but effective. TODO: fix this by iterating over tags. column_filter = ",".join( column_filter ) @@ -421,7 +421,7 @@ in_form=True, input_size="20", tag_click_fn="add_tag_to_grid_filter", use_toggle_link=True ) def get_filter( self, user, column_filter ): # Parse filter to extract multiple tags. - tag_handler = TagHandler() + tag_handler = GalaxyTagHandler() if isinstance( column_filter, list ): # Collapse list of tags into a single string; this is redundant but effective. TODO: fix this by iterating over tags. column_filter = ",".join( column_filter ) diff -r 4fb48981bdb0 -r e3167368345a templates/tagging_common.mako --- a/templates/tagging_common.mako Tue Apr 13 13:14:59 2010 -0400 +++ b/templates/tagging_common.mako Tue Apr 13 15:35:32 2010 -0400 @@ -4,10 +4,7 @@ from random import random from sys import maxint from math import floor - from galaxy.tags.tag_handler import TagHandler from galaxy.model import Tag, ItemTagAssociation - - tag_handler = TagHandler() %> ## Render a tagging element if there is a tagged_item. @@ -92,7 +89,7 @@ ## Build HTML. <% elt_id = int ( floor ( random()*maxint ) ) - community_tags = tag_handler.get_community_tags(trans.sa_session, tagged_item, 10) + community_tags = trans.app.tag_handler.get_community_tags( trans, item=tagged_item, limit=10 ) %> ${self.render_tagging_element_html(elt_id=elt_id, tags=community_tags, use_toggle_link=use_toggle_link, editable=False, tag_type="community")} diff -r 4fb48981bdb0 -r e3167368345a templates/user/index.mako --- a/templates/user/index.mako Tue Apr 13 13:14:59 2010 -0400 +++ b/templates/user/index.mako Tue Apr 13 15:35:32 2010 -0400 @@ -13,10 +13,9 @@ <li><a href="${h.url_for( action='show_info' )}">${_('Manage your information')}</a></li> <li><a href="${h.url_for( action='set_default_permissions' )}">${_('Change default permissions')}</a> for new histories</li> %endif - <li><a href="${h.url_for( action='logout' )}">${_('Logout')}</a></li> </ul> %else: - %if not msg: + %if not message: <p>${n_('You are currently not logged in.')}</p> %endif <ul> diff -r 4fb48981bdb0 -r e3167368345a test/base/twilltestcase.py --- a/test/base/twilltestcase.py Tue Apr 13 13:14:59 2010 -0400 +++ b/test/base/twilltestcase.py Tue Apr 13 15:35:32 2010 -0400 @@ -585,6 +585,16 @@ except: pass self.home() + def check_hda_attribute_info( self, hda_id, check_str1='', check_str2='', check_str3='', check_str4='' ): + """Edit history_dataset_association attribute information""" + if check_str1: + self.check_page_for_string( check_str1 ) + if check_str2: + self.check_page_for_string( check_str2 ) + if check_str3: + self.check_page_for_string( check_str3 ) + if check_str4: + self.check_page_for_string( check_str4 ) def auto_detect_metadata( self, hda_id ): """Auto-detect history_dataset_association metadata""" self.home() @@ -2050,3 +2060,9 @@ else: break self.assertNotEqual(count, maxiter) + + # Tests associated with tags + def add_tag( self, item_id, item_class, context, new_tag, check_str='' ): + self.visit_url( "%s/tag/add_tag_async?item_id=%s&item_class=%s&context=%s&new_tag=%s" % \ + ( self.url, item_id, item_class, context, new_tag ) ) + \ No newline at end of file diff -r 4fb48981bdb0 -r e3167368345a test/functional/test_tags.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/functional/test_tags.py Tue Apr 13 15:35:32 2010 -0400 @@ -0,0 +1,63 @@ +from base.twilltestcase import * +from base.test_db_util import * + +class TestTags( TwillTestCase ): + # TODO: Add more functional test coverage for tags + def test_000_initiate_users( self ): + """Ensuring all required user accounts exist""" + self.logout() + self.login( email='test1(a)bx.psu.edu', username='regular-user1' ) + global regular_user1 + regular_user1 = get_user( 'test1(a)bx.psu.edu' ) + assert regular_user1 is not None, 'Problem retrieving user with email "test1(a)bx.psu.edu" from the database' + global regular_user1_private_role + regular_user1_private_role = get_private_role( regular_user1 ) + self.logout() + self.login( email='test2(a)bx.psu.edu', username='regular-user2' ) + global regular_user2 + regular_user2 = get_user( 'test2(a)bx.psu.edu' ) + assert regular_user2 is not None, 'Problem retrieving user with email "test2(a)bx.psu.edu" from the database' + global regular_user2_private_role + regular_user2_private_role = get_private_role( regular_user2 ) + self.logout() + self.login( email='test3(a)bx.psu.edu', username='regular-user3' ) + global regular_user3 + regular_user3 = get_user( 'test3(a)bx.psu.edu' ) + assert regular_user3 is not None, 'Problem retrieving user with email "test3(a)bx.psu.edu" from the database' + global regular_user3_private_role + regular_user3_private_role = get_private_role( regular_user3 ) + self.logout() + self.login( email='test(a)bx.psu.edu', username='admin-user' ) + global admin_user + admin_user = get_user( 'test(a)bx.psu.edu' ) + assert admin_user is not None, 'Problem retrieving user with email "test(a)bx.psu.edu" from the database' + global admin_user_private_role + admin_user_private_role = get_private_role( admin_user ) + def test_005_add_tag_to_history( self ): + """Testing adding a tag to a history""" + # Logged in as admin_user + # Create a new, empty history named anonymous + name = 'anonymous' + self.new_history( name=name ) + global history1 + history1 = get_latest_history_for_user( admin_user ) + self.add_tag( self.security.encode_id( history1.id ), + 'History', + 'history.mako', + 'hello' ) + self.check_history_for_string( 'tags : {"hello"' ) + def test_010_add_tag_to_history_item( self ): + """Testing adding a tag to a history item""" + # Logged in as admin_user + self.upload_file( '1.bed' ) + latest_hda = get_latest_hda() + self.add_tag( self.security.encode_id( latest_hda.id ), + 'HistoryDatasetAssociation', + 'edit_attributes.mako', + 'goodbye' ) + self.check_hda_attribute_info( 'tags : {"goodbye"' ) + def test_999_reset_data_for_later_test_runs( self ): + """Reseting data to enable later test runs to to be valid""" + # logged in as admin_user + # Delete histories + self.delete_history( id=self.security.encode_id( history1.id ) )

1 0

[hg] galaxy 3637: Pack scripts.
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/4fb48981bdb0 changeset: 3637:4fb48981bdb0 user: jeremy goecks <jeremy.goecks(a)emory.edu> date: Tue Apr 13 13:14:59 2010 -0400 description: Pack scripts. diffstat: static/scripts/packed/galaxy.base.js | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diffs (8 lines): diff -r 9678e1225651 -r 4fb48981bdb0 static/scripts/packed/galaxy.base.js --- a/static/scripts/packed/galaxy.base.js Tue Apr 13 12:11:17 2010 -0400 +++ b/static/scripts/packed/galaxy.base.js Tue Apr 13 13:14:59 2010 -0400 @@ -1,1 +1,1 @@ -$(document).ready(function(){replace_big_select_inputs()});$.fn.makeAbsolute=function(a){return this.each(function(){var b=$(this);var c=b.position();b.css({position:"absolute",marginLeft:0,marginTop:0,top:c.top,left:c.left,right:$(window).width()-(c.left+b.width())});if(a){b.remove().appendTo("body")}})};function ensure_popup_helper(){if($("#popup-helper").length===0){$("<div id='popup-helper'/>").css({background:"white",opacity:0,zIndex:15000,position:"absolute",top:0,left:0,width:"100%",height:"100%"}).appendTo("body").hide()}}function attach_popupmenu(b,d){var a=function(){d.unbind().hide();$("#popup-helper").unbind("click.popupmenu").hide()};var c=function(g){$("#popup-helper").bind("click.popupmenu",a).show();d.click(a).css({left:0,top:-1000}).show();var f=g.pageX-d.width()/2;f=Math.min(f,$(document).scrollLeft()+$(window).width()-$(d).width()-20);f=Math.max(f,$(document).scrollLeft()+20);d.css({top:g.pageY-5,left:f});return false};$(b).click(c)}function make_popupmen! u(c,b){ensure_popup_helper();var a=$("<ul id='"+c.attr("id")+"-menu'></ul>");$.each(b,function(f,e){if(e){$("<li/>").html(f).click(e).appendTo(a)}else{$("<li class='head'/>").html(f).appendTo(a)}});var d=$("<div class='popmenu-wrapper'>");d.append(a).append("<div class='overlay-border'>").css("position","absolute").appendTo("body").hide();attach_popupmenu(c,d)}function make_popup_menus(){jQuery("div[popupmenu]").each(function(){var c={};$(this).find("a").each(function(){var b=$(this).attr("confirm"),d=$(this).attr("href"),e=$(this).attr("target");c[$(this).text()]=function(){if(!b||confirm(b)){var g=window;if(e=="_parent"){g=window.parent}else{if(e=="_top"){g=window.top}}g.location=d}}});var a=$("#"+$(this).attr("popupmenu"));make_popupmenu(a,c);$(this).remove();a.addClass("popup").show()})}function array_length(b){if(b.length){return b.length}var c=0;for(var a in b){c++}return c}function replace_big_select_inputs(){$("select[name=dbkey]").each(function(){var a=$(this);if(a! .find("option").length<20){return}var b=a.attr("value");var c=$("<inpu t type='text' class='text-and-autocomplete-select'></input>");c.attr("size",40);c.attr("name",a.attr("name"));c.click(function(){var g=$(this).attr("value");$(this).attr("value","Loading...");$(this).showAllInCache();$(this).attr("value",g);$(this).select()});var f=[];var e={};a.children("option").each(function(){var h=$(this).text();var g=$(this).attr("value");if(g=="?"){return}f.push(h);e[h]=g;e[g]=g;if(g==b){c.attr("value",h)}});if(c.attr("value")==""){c.attr("value","Click to Search or Select")}var d={selectFirst:false,autoFill:false,mustMatch:false,matchContains:true,max:1000,minChars:0,hideForLessThanMinChars:false};c.autocomplete(f,d);a.replaceWith(c);c.parents("form").submit(function(){var h=c.attr("value");var g=e[h];if(g!==null&&g!==undefined){c.attr("value",g)}else{if(b!=""){c.attr("value",b)}else{c.attr("value","?")}}})})}function async_save_text(d,f,e,a,c,h,i,g,b){if(c===undefined){c=30}if(i===undefined){i=4}$("#"+d).live("click",function(){if($("#renaming-activ! e").length>0){return}var l=$("#"+f),k=l.text(),j;if(h){j=$("<textarea></textarea>").attr({rows:i,cols:c}).text(k)}else{j=$("<input type='text'></input>").attr({value:k,size:c})}j.attr("id","renaming-active");j.blur(function(){$(this).remove();l.show();if(b){b(j)}});j.keyup(function(n){if(n.keyCode===27){$(this).trigger("blur")}else{if(n.keyCode===13){var m={};m[a]=$(this).val();$(this).trigger("blur");$.ajax({url:e,data:m,error:function(){alert("Text editing for elt "+f+" failed")},success:function(o){l.text(o);if(b){b(j)}}})}}});if(g){g(j)}l.hide();j.insertAfter(l);j.focus();j.select();return})}function init_history_items(c,a){var b=function(){try{var d=$.jStore.store("history_expand_state");if(d){for(var f in d){$("#"+f+" div.historyItemBody").show()}}}catch(e){$.jStore.remove("history_expand_state")}if($.browser.mozilla){$("div.historyItemBody").each(function(){if(!$(this).is(":visible")){$(this).find("pre.peek").css("overflow","hidden")}})}c.each(function(){var i=this.i! d;var g=$(this).children("div.historyItemBody");var h=g.find("pre.peek ");$(this).children(".historyItemTitleBar").find(".historyItemTitle").wrap("<a href='#'></a>").click(function(){if(g.is(":visible")){if($.browser.mozilla){h.css("overflow","hidden")}g.slideUp("fast");if(!a){var j=$.jStore.store("history_expand_state");if(j){delete j[i];$.jStore.store("history_expand_state",j)}}}else{g.slideDown("fast",function(){if($.browser.mozilla){h.css("overflow","auto")}});if(!a){var j=$.jStore.store("history_expand_state");if(j===undefined){j={}}j[i]=true;$.jStore.store("history_expand_state",j)}}return false})});$("#top-links > a.toggle").click(function(){var g=$.jStore.store("history_expand_state");if(g===undefined){g={}}$("div.historyItemBody:visible").each(function(){if($.browser.mozilla){$(this).find("pre.peek").css("overflow","hidden")}$(this).slideUp("fast");if(g){delete g[$(this).parent().attr("id")]}});$.jStore.store("history_expand_state",g)}).show()};if(a){b()}else{$.jStore.init("galaxy");$.jStore.engineReady(function(){b()})}}$(document).re! ady(function(){$("a[confirm]").click(function(){return confirm($(this).attr("confirm"))});if($.fn.tipsy){$(".tooltip").tipsy({gravity:"s"})}make_popup_menus()}); \ No newline at end of file +$(document).ready(function(){replace_big_select_inputs()});$.fn.makeAbsolute=function(a){return this.each(function(){var b=$(this);var c=b.position();b.css({position:"absolute",marginLeft:0,marginTop:0,top:c.top,left:c.left,right:$(window).width()-(c.left+b.width())});if(a){b.remove().appendTo("body")}})};function ensure_popup_helper(){if($("#popup-helper").length===0){$("<div id='popup-helper'/>").css({background:"white",opacity:0,zIndex:15000,position:"absolute",top:0,left:0,width:"100%",height:"100%"}).appendTo("body").hide()}}function attach_popupmenu(b,d){var a=function(){d.unbind().hide();$("#popup-helper").unbind("click.popupmenu").hide()};var c=function(g){$("#popup-helper").bind("click.popupmenu",a).show();d.click(a).css({left:0,top:-1000}).show();var f=g.pageX-d.width()/2;f=Math.min(f,$(document).scrollLeft()+$(window).width()-$(d).width()-20);f=Math.max(f,$(document).scrollLeft()+20);d.css({top:g.pageY-5,left:f});return false};$(b).click(c)}function make_popupmen! u(c,b){ensure_popup_helper();var a=$("<ul id='"+c.attr("id")+"-menu'></ul>");$.each(b,function(f,e){if(e){$("<li/>").html(f).click(e).appendTo(a)}else{$("<li class='head'/>").html(f).appendTo(a)}});var d=$("<div class='popmenu-wrapper'>");d.append(a).append("<div class='overlay-border'>").css("position","absolute").appendTo("body").hide();attach_popupmenu(c,d)}function make_popup_menus(){jQuery("div[popupmenu]").each(function(){var c={};$(this).find("a").each(function(){var b=$(this).attr("confirm"),d=$(this).attr("href"),e=$(this).attr("target");c[$(this).text()]=function(){if(!b||confirm(b)){var g=window;if(e=="_parent"){g=window.parent}else{if(e=="_top"){g=window.top}}g.location=d}}});var a=$("#"+$(this).attr("popupmenu"));make_popupmenu(a,c);$(this).remove();a.addClass("popup").show()})}function array_length(b){if(b.length){return b.length}var c=0;for(var a in b){c++}return c}function replace_big_select_inputs(){$("select[name=dbkey]").each(function(){var a=$(this);if(a! .find("option").length<20){return}var b=a.attr("value");var c=$("<inpu t type='text' class='text-and-autocomplete-select'></input>");c.attr("size",40);c.attr("name",a.attr("name"));c.click(function(){var g=$(this).attr("value");$(this).attr("value","Loading...");$(this).showAllInCache();$(this).attr("value",g);$(this).select()});var f=[];var e={};a.children("option").each(function(){var h=$(this).text();var g=$(this).attr("value");if(g=="?"){return}f.push(h);e[h]=g;e[g]=g;if(g==b){c.attr("value",h)}});f.push("unspecified (?)");e["unspecified (?)"]="?";e["?"]="?";if(c.attr("value")==""){c.attr("value","Click to Search or Select")}var d={selectFirst:false,autoFill:false,mustMatch:false,matchContains:true,max:1000,minChars:0,hideForLessThanMinChars:false};c.autocomplete(f,d);a.replaceWith(c);c.parents("form").submit(function(){var h=c.attr("value");var g=e[h];if(g!==null&&g!==undefined){c.attr("value",g)}else{if(b!=""){c.attr("value",b)}else{c.attr("value","?")}}})})}function async_save_text(d,f,e,a,c,h,i,g,b){if(c===undefined){c=30}if(i===undefin! ed){i=4}$("#"+d).live("click",function(){if($("#renaming-active").length>0){return}var l=$("#"+f),k=l.text(),j;if(h){j=$("<textarea></textarea>").attr({rows:i,cols:c}).text(k)}else{j=$("<input type='text'></input>").attr({value:k,size:c})}j.attr("id","renaming-active");j.blur(function(){$(this).remove();l.show();if(b){b(j)}});j.keyup(function(n){if(n.keyCode===27){$(this).trigger("blur")}else{if(n.keyCode===13){var m={};m[a]=$(this).val();$(this).trigger("blur");$.ajax({url:e,data:m,error:function(){alert("Text editing for elt "+f+" failed")},success:function(o){l.text(o);if(b){b(j)}}})}}});if(g){g(j)}l.hide();j.insertAfter(l);j.focus();j.select();return})}function init_history_items(c,a){var b=function(){try{var d=$.jStore.store("history_expand_state");if(d){for(var f in d){$("#"+f+" div.historyItemBody").show()}}}catch(e){$.jStore.remove("history_expand_state")}if($.browser.mozilla){$("div.historyItemBody").each(function(){if(!$(this).is(":visible")){$(this).find("pre.pee! k").css("overflow","hidden")}})}c.each(function(){var i=this.id;var g= $(this).children("div.historyItemBody");var h=g.find("pre.peek");$(this).children(".historyItemTitleBar").find(".historyItemTitle").wrap("<a href='#'></a>").click(function(){if(g.is(":visible")){if($.browser.mozilla){h.css("overflow","hidden")}g.slideUp("fast");if(!a){var j=$.jStore.store("history_expand_state");if(j){delete j[i];$.jStore.store("history_expand_state",j)}}}else{g.slideDown("fast",function(){if($.browser.mozilla){h.css("overflow","auto")}});if(!a){var j=$.jStore.store("history_expand_state");if(j===undefined){j={}}j[i]=true;$.jStore.store("history_expand_state",j)}}return false})});$("#top-links > a.toggle").click(function(){var g=$.jStore.store("history_expand_state");if(g===undefined){g={}}$("div.historyItemBody:visible").each(function(){if($.browser.mozilla){$(this).find("pre.peek").css("overflow","hidden")}$(this).slideUp("fast");if(g){delete g[$(this).parent().attr("id")]}});$.jStore.store("history_expand_state",g)}).show()};if(a){b()}else{$.jStore.init("! galaxy");$.jStore.engineReady(function(){b()})}}$(document).ready(function(){$("a[confirm]").click(function(){return confirm($(this).attr("confirm"))});if($.fn.tipsy){$(".tooltip").tipsy({gravity:"s"})}make_popup_menus()}); \ No newline at end of file

1 0

[hg] galaxy 3636: Allow setting the session cookie path for avoi...
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/9678e1225651 changeset: 3636:9678e1225651 user: Nate Coraor <nate(a)bx.psu.edu> date: Tue Apr 13 12:11:17 2010 -0400 description: Allow setting the session cookie path for avoiding conflicts with multiple galaxy servers running behind the same hostname not at the server root diffstat: lib/galaxy/config.py | 1 + lib/galaxy/web/framework/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletions(-) diffs (23 lines): diff -r cdd8d520f3c7 -r 9678e1225651 lib/galaxy/config.py --- a/lib/galaxy/config.py Tue Apr 13 11:23:19 2010 -0400 +++ b/lib/galaxy/config.py Tue Apr 13 12:11:17 2010 -0400 @@ -38,6 +38,7 @@ # Where dataset files are stored self.file_path = resolve_path( kwargs.get( "file_path", "database/files" ), self.root ) self.new_file_path = resolve_path( kwargs.get( "new_file_path", "database/tmp" ), self.root ) + self.cookie_path = kwargs.get( "cookie_path", "/" ) # dataset Track files self.track_store_path = kwargs.get( "track_store_path", "${extra_files_path}/tracks") self.tool_path = resolve_path( kwargs.get( "tool_path", "tools" ), self.root ) diff -r cdd8d520f3c7 -r 9678e1225651 lib/galaxy/web/framework/__init__.py --- a/lib/galaxy/web/framework/__init__.py Tue Apr 13 11:23:19 2010 -0400 +++ b/lib/galaxy/web/framework/__init__.py Tue Apr 13 12:11:17 2010 -0400 @@ -410,7 +410,7 @@ """ Update the session cookie to match the current session. """ - self.set_cookie( self.security.encode_session_key( self.galaxy_session.session_key ), name=name ) + self.set_cookie( self.security.encode_session_key( self.galaxy_session.session_key ), name=name, path=self.app.config.cookie_path ) def handle_user_login( self, user, webapp ): """ Login a new user (possibly newly created)

1 0

[hg] galaxy 3635: merge
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/cdd8d520f3c7 changeset: 3635:cdd8d520f3c7 user: Anton Nekrutenko <anton(a)bx.psu.edu> date: Tue Apr 13 11:23:19 2010 -0400 description: merge diffstat: static/scripts/checkbox_and_radiobutton.js | 347 ++++ static/scripts/helper_functions.js | 817 ++++++++++ static/scripts/timer.js | 74 + tools/rgenetics/rgGRR.py | 2241 ++++++++++++++------------- tools/rgenetics/rgManQQ.py | 19 +- 5 files changed, 2396 insertions(+), 1102 deletions(-) diffs (truncated from 3540 to 3000 lines): diff -r a6e3f4cae4ce -r cdd8d520f3c7 static/scripts/checkbox_and_radiobutton.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/scripts/checkbox_and_radiobutton.js Tue Apr 13 11:23:19 2010 -0400 @@ -0,0 +1,347 @@ +/* +Scripts to create interactive checkboxes and radio buttons in SVG using ECMA script +Copyright (C) <2007> <Andreas Neumann> +Version 1.1.3, 2007-08-09 +neumann(a)karto.baug.ethz.ch +http://www.carto.net/ +http://www.carto.net/neumann/ + +Credits: +* Guy Morton for providing a fix to let users toggle checkboxes by clicking on text labels +* Bruce Rindahl for providing the bugfix described in version 1.1.2 +* Simon Shutter for providing a fix for the ASV in IE crash when reloading the SVG file after calling the .remove() method on a checkbox + +---- + +Documentation: http://www.carto.net/papers/svg/gui/checkbox_and_radiobutton/ + +---- + +current version: 1.1.3 + +version history: +1.0 (2006-03-13) +initial version + +1.1 (2006-07-11) +text labels are now clickable (thanks to Guy Morton) +added method .moveTo() to move checkbox to a different location +introduced new constructor parameter labelYOffset to allow more flexible placement of the text label + +1.1.1 (2007-02-06) +added cursor pointer to the text label and use element representing the checkBox + +1.1.2 (2007-04-19) +bug fix: this.selectedIndex was not correctly initialized in method addCheckBox of the radioButtonGroup object + +1.1.3 (2007-08-09) +bug fix: the method .remove() was slightly modified (using removeEventListener) for avoiding a crash related to the method after reloading the SVG file + +------- + + +This ECMA script library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library (lesser_gpl.txt); if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +---- + +original document site: http://www.carto.net/papers/svg/gui/checkbox_and_radiobutton/ +Please contact the author in case you want to use code or ideas commercially. +If you use this code, please include this copyright header, the included full +LGPL 2.1 text and read the terms provided in the LGPL 2.1 license +(http://www.gnu.org/copyleft/lesser.txt) + +------------------------------- + +Please report bugs and send improvements to neumann(a)karto.baug.ethz.ch +If you use this control, please link to the original (http://www.carto.net/papers/svg/gui/checkbox_and_radiobutton/) +somewhere in the source-code-comment or the "about" of your project and give credits, thanks! + +*/ + +function checkBox(id,parentNode,x,y,checkboxId,checkcrossId,checkedStatus,labelText,textStyles,labelDistance,labelYOffset,radioButtonGroup,functionToCall) { + var nrArguments = 13; + var createCheckbox= true; + if (arguments.length == nrArguments) { + this.id = id; //an internal id, this id is not used in the SVG Dom tree + this.parentNode = parentNode; //the parentNode, string or nodeReference + this.x = x; //the center of the checkBox + this.y = y; //the center of the checkBox + this.checkboxId = checkboxId; //the id of the checkbox symbol (background) + this.checkcrossId = checkcrossId; //the id of the checkbox symbol (foreground), pointer-events should be set to "none" + this.checkedStatus = checkedStatus; //a status variable (true|false), indicates if checkbox is on or off + this.labelText = labelText; //the text of the checkbox label to be displayed, use undefined or empty string if you don't need a label text + this.textStyles = textStyles; //an array of literals containing the text settings + if (!this.textStyles["font-size"]) { + this.textStyles["font-size"] = 12; + } + this.labelDistance = labelDistance; //a distance defined from the center of the checkbox to the left of the text of the label + this.labelYOffset = labelYOffset; //a y offset value for the text label in relation to the checkbox symbol center + this.radioButtonGroup = radioButtonGroup; //a reference to a radio button group, if this is a standalone checkBox, just use the parameter undefined + this.functionToCall = functionToCall; //the function to call after triggering checkBox + this.exists = true; //status that indicates if checkbox exists or not, is set to false after method .remove() was called + this.label = undefined; //later a reference to the label text node + } + else { + createCheckbox = false; + alert("Error in checkbox ("+id+"): wrong nr of arguments! You have to pass over "+nrArguments+" parameters."); + } + if (createCheckbox) { + //timer stuff + this.timer = new Timer(this); //a Timer instance for calling the functionToCall + if (this.radioButtonGroup) { + this.timerMs = 0; + } + else { + this.timerMs = 200; //a constant of this object that is used in conjunction with the timer - functionToCall is called after 200 ms + } + //create checkbox + this.createCheckBox(); + } + else { + alert("Could not create checkbox with id '"+id+"' due to errors in the constructor parameters"); + } +} + +//this method creates all necessary checkbox geometry +checkBox.prototype.createCheckBox = function() { + if (typeof(this.parentNode) == "string") { + this.parentNode = document.getElementById(this.parentNode); + } + //create checkbox + this.checkBox = document.createElementNS(svgNS,"use"); + this.checkBox.setAttributeNS(null,"x",this.x); + this.checkBox.setAttributeNS(null,"y",this.y); + this.checkBox.setAttributeNS(xlinkNS,"href","#"+this.checkboxId); + this.checkBox.addEventListener("click",this,false); + this.checkBox.setAttributeNS(null,"cursor","pointer"); + this.parentNode.appendChild(this.checkBox); + //create checkcross + this.checkCross = document.createElementNS(svgNS,"use"); + this.checkCross.setAttributeNS(null,"x",this.x); + this.checkCross.setAttributeNS(null,"y",this.y); + this.checkCross.setAttributeNS(xlinkNS,"href","#"+this.checkcrossId); + this.parentNode.appendChild(this.checkCross); + if (this.checkedStatus == false) { + this.checkCross.setAttributeNS(null,"display","none"); + } + //create label, if any + if (this.labelText) { + if (this.labelText.length > 0) { + this.label = document.createElementNS(svgNS,"text"); + for (var attrib in this.textStyles) { + var value = this.textStyles[attrib]; + if (attrib == "font-size") { + value += "px"; + } + this.label.setAttributeNS(null,attrib,value); + } + this.label.setAttributeNS(null,"x",(this.x + this.labelDistance)); + this.label.setAttributeNS(null,"y",(this.y + this.labelYOffset)); + this.label.setAttributeNS(null,"cursor","pointer"); + var labelTextNode = document.createTextNode(this.labelText); + this.label.appendChild(labelTextNode); + this.label.setAttributeNS(null,"pointer-events","all"); + this.label.addEventListener("click",this,false); + this.parentNode.appendChild(this.label); + } + } + if (this.radioButtonGroup) { + this.radioButtonGroup.addCheckBox(this); + } +} + +checkBox.prototype.handleEvent = function(evt) { + if (evt.type == "click") { + if (this.checkedStatus == true) { + this.checkCross.setAttributeNS(null,"display","none"); + this.checkedStatus = false; + } + else { + this.checkCross.setAttributeNS(null,"display","inline"); + this.checkedStatus = true; + } + } + this.timer.setTimeout("fireFunction",this.timerMs); +} + +checkBox.prototype.fireFunction = function() { + if (this.radioButtonGroup) { + this.radioButtonGroup.selectById(this.id,true); + } + else { + if (typeof(this.functionToCall) == "function") { + this.functionToCall(this.id,this.checkedStatus,this.labelText); + } + if (typeof(this.functionToCall) == "object") { + this.functionToCall.checkBoxChanged(this.id,this.checkedStatus,this.labelText); + } + if (typeof(this.functionToCall) == undefined) { + return; + } + } +} + +checkBox.prototype.check = function(FireFunction) { + this.checkCross.setAttributeNS(null,"display","inherit"); + this.checkedStatus = true; + if (FireFunction) { + this.timer.setTimeout("fireFunction",this.timerMs); + } +} + +checkBox.prototype.uncheck = function(FireFunction) { + this.checkCross.setAttributeNS(null,"display","none"); + this.checkedStatus = false; + if (FireFunction) { + this.timer.setTimeout("fireFunction",this.timerMs); + } +} + +//move checkbox to a different position +checkBox.prototype.moveTo = function(moveX,moveY) { + this.x = moveX; + this.y = moveY; + //move checkbox + this.checkBox.setAttributeNS(null,"x",this.x); + this.checkBox.setAttributeNS(null,"y",this.y); + //move checkcross + this.checkCross.setAttributeNS(null,"x",this.x); + this.checkCross.setAttributeNS(null,"y",this.y); + //move text label + if (this.labelText) { + this.label.setAttributeNS(null,"x",(this.x + this.labelDistance)); + this.label.setAttributeNS(null,"y",(this.y + this.labelYOffset)); + } +} + +checkBox.prototype.remove = function(FireFunction) { + this.checkBox.removeEventListener("click",this,false); + this.parentNode.removeChild(this.checkBox); + this.parentNode.removeChild(this.checkCross); + if (this.label) { + this.parentNode.removeChild(this.label); + } + this.exists = false; +} + +checkBox.prototype.setLabelText = function(labelText) { + this.labelText = labelText + if (this.label) { + this.label.firstChild.nodeValue = labelText; + } + else { + if (this.labelText.length > 0) { + this.label = document.createElementNS(svgNS,"text"); + for (var attrib in this.textStyles) { + value = this.textStyles[attrib]; + if (attrib == "font-size") { + value += "px"; + } + this.label.setAttributeNS(null,attrib,value); + } + this.label.setAttributeNS(null,"x",(this.x + this.labelDistance)); + this.label.setAttributeNS(null,"y",(this.y + this.textStyles["font-size"] * 0.3)); + var labelTextNode = document.createTextNode(this.labelText); + this.label.appendChild(labelTextNode); + this.parentNode.appendChild(this.label); + } + } +} + +/* start of the radioButtonGroup object */ + +function radioButtonGroup(id,functionToCall) { + var nrArguments = 2; + if (arguments.length == nrArguments) { + this.id = id; + if (typeof(functionToCall) == "function" || typeof(functionToCall) == "object" || typeof(functionToCall) == undefined) { + this.functionToCall = functionToCall; + } + else { + alert("Error in radiobutton with ("+id+"): argument functionToCall is not of type 'function', 'object' or undefined!"); + } + this.checkBoxes = new Array(); //this array will hold checkbox objects + this.selectedId = undefined; //holds the id of the active radio button + this.selectedIndex = undefined; //holds the index of the active radio button + //timer stuff + this.timer = new Timer(this); //a Timer instance for calling the functionToCall + this.timerMs = 200; //a constant of this object that is used in conjunction with the timer - functionToCall is called after 200 ms + } + else { + alert("Error in radiobutton with ("+id+"): wrong nr of arguments! You have to pass over "+nrArguments+" parameters."); + } +} + +radioButtonGroup.prototype.addCheckBox = function(checkBoxObj) { + this.checkBoxes.push(checkBoxObj); + if (checkBoxObj.checkedStatus) { + this.selectedId = checkBoxObj.id; + this.selectedIndex = this.checkBoxes.length - 1; + } +} + +//change radio button selection by id +radioButtonGroup.prototype.selectById = function(cbId,fireFunction) { + var found = false; + for (var i=0;i<this.checkBoxes.length;i++) { + if (this.checkBoxes[i].id == cbId) { + this.selectedId = cbId; + this.selectedIndex = i; + if (this.checkBoxes[i].checkedStatus == false) { + this.checkBoxes[i].check(false); + } + found = true; + } + else { + this.checkBoxes[i].uncheck(false); + } + } + if (found) { + if (fireFunction) { + this.timer.setTimeout("fireFunction",this.timerMs); + } + } + else { + alert("Error in radiobutton with ("+this.id+"): could not find checkbox with id '"+cbId+"'"); + } +} + +//change radio button selection by label name +radioButtonGroup.prototype.selectByLabelname = function(labelName,fireFunction) { + var id = -1; + for (var i=0;i<this.checkBoxes.length;i++) { + if (this.checkBoxes[i].labelText == labelName) { + id = this.checkBoxes[i].id; + } + } + if (id == -1) { + alert("Error in radiobutton with ("+this.id+"): could not find checkbox with label '"+labelName+"'"); + } + else { + this.selectById(id,fireFunction); + } +} + +radioButtonGroup.prototype.fireFunction = function() { + if (typeof(this.functionToCall) == "function") { + this.functionToCall(this.id,this.selectedId,this.checkBoxes[this.selectedIndex].labelText); + } + if (typeof(this.functionToCall) == "object") { + this.functionToCall.radioButtonChanged(this.id,this.selectedId,this.checkBoxes[this.selectedIndex].labelText); + } + if (typeof(this.functionToCall) == undefined) { + return; + } +} \ No newline at end of file diff -r a6e3f4cae4ce -r cdd8d520f3c7 static/scripts/helper_functions.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/scripts/helper_functions.js Tue Apr 13 11:23:19 2010 -0400 @@ -0,0 +1,817 @@ +/** + * @fileoverview + * + * ECMAScript <a href="http://www.carto.net/papers/svg/resources/helper_functions.html">helper functions</a>, main purpose is to serve in SVG mapping or other SVG based web applications + * + * This ECMA script library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library (http://www.carto.net/papers/svg/resources/lesser_gpl.txt) if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Please report bugs and send improvements to neumann(a)karto.baug.ethz.ch + * If you use these scripts, please link to the original (http://www.carto.net/papers/svg/resources/helper_functions.html) + * somewhere in the source-code-comment or the "about" of your project and give credits, thanks! + * + * See <a href="js_docs_out/overview-summary-helper_functions.js.html">documentation</a>. + * + * @author Andreas Neumann a.neumann(a)carto.net + * @copyright LGPL 2.1 <a href="http://www.gnu.org/copyleft/lesser.txt">Gnu LGPL 2.1</a> + * @credits Bruce Rindahl, numerous people on svgdevelopers(a)yahoogroups.com + */ + +//global variables necessary to create elements in these namespaces, do not delete them!!!! + +/** + * This variable is a shortcut to the full URL of the SVG namespace + * @final + * @type String + */ +var svgNS = "http://www.w3.org/2000/svg"; + +/** + * This variable is a shortcut to the full URL of the XLink namespace + * @final + * @type String + */ +var xlinkNS = "http://www.w3.org/1999/xlink"; + +/** + * This variable is a shortcut to the full URL of the attrib namespace + * @final + * @type String + */ +var cartoNS = "http://www.carto.net/attrib"; + +/** + * This variable is a alias to the full URL of the attrib namespace + * @final + * @type String + */ +var attribNS = "http://www.carto.net/attrib"; + +/** + * This variable is a alias to the full URL of the Batik extension namespace + * @final + * @type String + */ +var batikNS = "http://xml.apache.org/batik/ext"; + +/** + * Returns the polar direction from a given vector + * @param {Number} xdiff the x-part of the vector + * @param {Number} ydiff the y-part of the vector + * @return direction the direction in radians + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDist + * @see #toRectX + * @see #toRectY + */ +function toPolarDir(xdiff,ydiff) { + var direction = (Math.atan2(ydiff,xdiff)); + return(direction); +} + +/** + * Returns the polar distance from a given vector + * @param {Number} xdiff the x-part of the vector + * @param {Number} ydiff the y-part of the vector + * @return distance the distance + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDir + * @see #toRectX + * @see #toRectY + */ +function toPolarDist(xdiff,ydiff) { + var distance = Math.sqrt(xdiff * xdiff + ydiff * ydiff); + return(distance); +} + +/** + * Returns the x-part of a vector from a given direction and distance + * @param {Number} direction the direction (in radians) + * @param {Number} distance the distance + * @return x the x-part of the vector + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDist + * @see #toPolarDir + * @see #toRectY + */ +function toRectX(direction,distance) { + var x = distance * Math.cos(direction); + return(x); +} + +/** + * Returns the y-part of the vector from a given direction and distance + * @param {Number} direction the direction (in radians) + * @param {Number} distance the distance + * @return y the y-part of the vector + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDist + * @see #toPolarDir + * @see #toRectX + */ +function toRectY(direction,distance) { + y = distance * Math.sin(direction); + return(y); +} + +/** + * Converts degrees to radians + * @param {Number} deg the degree value + * @return rad the radians value + * @type Number + * @version 1.0 (2007-04-30) + * @see #RadToDeg + */ +function DegToRad(deg) { + return (deg / 180.0 * Math.PI); +} + +/** + * Converts radians to degrees + * @param {Number} rad the radians value + * @return deg the degree value + * @type Number + * @version 1.0 (2007-04-30) + * @see #DegToRad + */ +function RadToDeg(rad) { + return (rad / Math.PI * 180.0); +} + +/** + * Converts decimal degrees to degrees, minutes, seconds + * @param {Number} dd the decimal degree value + * @return degrees the degree values in the following notation: {deg:degrees,min:minutes,sec:seconds} + * @type literal + * @version 1.0 (2007-04-30) + * @see #dms2dd + */ +function dd2dms(dd) { + var minutes = (Math.abs(dd) - Math.floor(Math.abs(dd))) * 60; + var seconds = (minutes - Math.floor(minutes)) * 60; + var minutes = Math.floor(minutes); + if (dd >= 0) { + var degrees = Math.floor(dd); + } + else { + var degrees = Math.ceil(dd); + } + return {deg:degrees,min:minutes,sec:seconds}; +} + +/** + * Converts degrees, minutes and seconds to decimal degrees + * @param {Number} deg the degree value + * @param {Number} min the minute value + * @param {Number} sec the second value + * @return deg the decimal degree values + * @type Number + * @version 1.0 (2007-04-30) + * @see #dd2dms + */ +function dms2dd(deg,min,sec) { + if (deg < 0) { + return deg - (min / 60) - (sec / 3600); + } + else { + return deg + (min / 60) + (sec / 3600); + } +} + +/** + * log function, missing in the standard Math object + * @param {Number} x the value where the log function should be applied to + * @param {Number} b the base value for the log function + * @return logResult the result of the log function + * @type Number + * @version 1.0 (2007-04-30) + */ +function log(x,b) { + if(b==null) b=Math.E; + return Math.log(x)/Math.log(b); +} + +/** + * interpolates a value (e.g. elevation) bilinearly based on the position within a cell with 4 corner values + * @param {Number} za the value at the upper left corner of the cell + * @param {Number} zb the value at the upper right corner of the cell + * @param {Number} zc the value at the lower right corner of the cell + * @param {Number} zd the value at the lower left corner of the cell + * @param {Number} xpos the x position of the point where a new value should be interpolated + * @param {Number} ypos the y position of the point where a new value should be interpolated + * @param {Number} ax the x position of the lower left corner of the cell + * @param {Number} ay the y position of the lower left corner of the cell + * @param {Number} cellsize the size of the cell + * @return interpol_value the result of the bilinear interpolation function + * @type Number + * @version 1.0 (2007-04-30) + */ +function intBilinear(za,zb,zc,zd,xpos,ypos,ax,ay,cellsize) { //bilinear interpolation function + var e = (xpos - ax) / cellsize; + var f = (ypos - ay) / cellsize; + + //calculation of weights + var wa = (1 - e) * (1 - f); + var wb = e * (1 - f); + var wc = e * f; + var wd = f * (1 - e); + + var interpol_value = wa * zc + wb * zd + wc * za + wd * zb; + return interpol_value; +} + +/** + * tests if a given point is left or right of a given line + * @param {Number} pointx the x position of the given point + * @param {Number} pointy the y position of the given point + * @param {Number} linex1 the x position of line's start point + * @param {Number} liney1 the y position of line's start point + * @param {Number} linex2 the x position of line's end point + * @param {Number} liney2 the y position of line's end point + * @return leftof the result of the leftOfTest, 1 means leftOf, 0 means rightOf + * @type Number (integer, 0|1) + * @version 1.0 (2007-04-30) + */ +function leftOfTest(pointx,pointy,linex1,liney1,linex2,liney2) { + var result = (liney1 - pointy) * (linex2 - linex1) - (linex1 - pointx) * (liney2 - liney1); + if (result < 0) { + var leftof = 1; //case left of + } + else { + var leftof = 0; //case left of + } + return leftof; +} + +/** + * calculates the distance between a given point and a given line + * @param {Number} pointx the x position of the given point + * @param {Number} pointy the y position of the given point + * @param {Number} linex1 the x position of line's start point + * @param {Number} liney1 the y position of line's start point + * @param {Number} linex2 the x position of line's end point + * @param {Number} liney2 the y position of line's end point + * @return distance the result of the leftOfTest, 1 means leftOf, 0 means rightOf + * @type Number + * @version 1.0 (2007-04-30) + */ +function distFromLine(xpoint,ypoint,linex1,liney1,linex2,liney2) { + var dx = linex2 - linex1; + var dy = liney2 - liney1; + var distance = (dy * (xpoint - linex1) - dx * (ypoint - liney1)) / Math.sqrt(Math.pow(dx,2) + Math.pow(dy,2)); + return distance; +} + +/** + * calculates the angle between two vectors (lines) + * @param {Number} ax the x part of vector a + * @param {Number} ay the y part of vector a + * @param {Number} bx the x part of vector b + * @param {Number} by the y part of vector b + * @return angle the angle in radians + * @type Number + * @version 1.0 (2007-04-30) + * @credits <a href="http://www.mathe-online.at/mathint/vect2/i.html#Winkel">Mathe Online (Winkel)</a> + */ +function angleBetwTwoLines(ax,ay,bx,by) { + var angle = Math.acos((ax * bx + ay * by) / (Math.sqrt(Math.pow(ax,2) + Math.pow(ay,2)) * Math.sqrt(Math.pow(bx,2) + Math.pow(by,2)))); + return angle; +} + +/** + * calculates the bisector vector for two given vectors + * @param {Number} ax the x part of vector a + * @param {Number} ay the y part of vector a + * @param {Number} bx the x part of vector b + * @param {Number} by the y part of vector b + * @return c the resulting vector as an Array, c[0] is the x part of the vector, c[1] is the y part + * @type Array + * @version 1.0 (2007-04-30) + * @credits <a href="http://www.mathe-online.at/mathint/vect1/i.html#Winkelsymmetrale">Mathe Online (Winkelsymmetrale)</a> + * see #calcBisectorAngle + * */ +function calcBisectorVector(ax,ay,bx,by) { + var betraga = Math.sqrt(Math.pow(ax,2) + Math.pow(ay,2)); + var betragb = Math.sqrt(Math.pow(bx,2) + Math.pow(by,2)); + var c = new Array(); + c[0] = ax / betraga + bx / betragb; + c[1] = ay / betraga + by / betragb; + return c; +} + +/** + * calculates the bisector angle for two given vectors + * @param {Number} ax the x part of vector a + * @param {Number} ay the y part of vector a + * @param {Number} bx the x part of vector b + * @param {Number} by the y part of vector b + * @return angle the bisector angle in radians + * @type Number + * @version 1.0 (2007-04-30) + * @credits <a href="http://www.mathe-online.at/mathint/vect1/i.html#Winkelsymmetrale">Mathe Online (Winkelsymmetrale)</a> + * see #calcBisectorVector + * */ +function calcBisectorAngle(ax,ay,bx,by) { + var betraga = Math.sqrt(Math.pow(ax,2) + Math.pow(ay,2)); + var betragb = Math.sqrt(Math.pow(bx,2) + Math.pow(by,2)); + var c1 = ax / betraga + bx / betragb; + var c2 = ay / betraga + by / betragb; + var angle = toPolarDir(c1,c2); + return angle; +} + +/** + * calculates the intersection point of two given lines + * @param {Number} line1x1 the x the start point of line 1 + * @param {Number} line1y1 the y the start point of line 1 + * @param {Number} line1x2 the x the end point of line 1 + * @param {Number} line1y2 the y the end point of line 1 + * @return interSectPoint the intersection point, interSectPoint.x contains x-part, interSectPoint.y the y-part of the resulting coordinate + * @type Object + * @version 1.0 (2007-04-30) + * @credits <a href="http://astronomy.swin.edu.au/~pbourke/geometry/lineline2d/">P. Bourke</a> + */ +function intersect2lines(line1x1,line1y1,line1x2,line1y2,line2x1,line2y1,line2x2,line2y2) { + var interSectPoint = new Object(); + var denominator = (line2y2 - line2y1)*(line1x2 - line1x1) - (line2x2 - line2x1)*(line1y2 - line1y1); + if (denominator == 0) { + alert("lines are parallel"); + } + else { + var ua = ((line2x2 - line2x1)*(line1y1 - line2y1) - (line2y2 - line2y1)*(line1x1 - line2x1)) / denominator; + var ub = ((line1x2 - line1x1)*(line1y1 - line2y1) - (line1y2 - line1y1)*(line1x1 - line2x1)) / denominator; + } + interSectPoint["x"] = line1x1 + ua * (line1x2 - line1x1); + interSectPoint["y"] = line1y1 + ua * (line1y2 - line1y1); + return interSectPoint; +} + +/** + * reformats a given number to a string by adding separators at every third digit + * @param {String|Number} inputNumber the input number, can be of type number or string + * @param {String} separator the separator, e.g. ' or , + * @return newString the intersection point, interSectPoint.x contains x-part, interSectPoint.y the y-part of the resulting coordinate + * @type String + * @version 1.0 (2007-04-30) + */ +function formatNumberString(inputNumber,separator) { + //check if of type string, if number, convert it to string + if (typeof(inputNumber) == "Number") { + var myTempString = inputNumber.toString(); + } + else { + var myTempString = inputNumber; + } + var newString=""; + //if it contains a comma, it will be split + var splitResults = myTempString.split("."); + var myCounter = splitResults[0].length; + if (myCounter > 3) { + while(myCounter > 0) { + if (myCounter > 3) { + newString = separator + splitResults[0].substr(myCounter - 3,3) + newString; + } + else { + newString = splitResults[0].substr(0,myCounter) + newString; + } + myCounter -= 3; + } + } + else { + newString = splitResults[0]; + } + //concatenate if it contains a comma + if (splitResults[1]) { + newString = newString + "." + splitResults[1]; + } + return newString; +} + +/** + * writes a status text message out to a SVG text element's first child + * @param {String} statusText the text message to be displayed + * @version 1.0 (2007-04-30) + */ + function statusChange(statusText) { + document.getElementById("statusText").firstChild.nodeValue = "Statusbar: " + statusText; +} + +/** + * scales an SVG element, requires that the element has an x and y attribute (e.g. circle, ellipse, use element, etc.) + * @param {dom::Event} evt the evt object that triggered the scaling + * @param {Number} factor the scaling factor + * @version 1.0 (2007-04-30) + */ +function scaleObject(evt,factor) { + //reference to the currently selected object + var element = evt.currentTarget; + var myX = element.getAttributeNS(null,"x"); + var myY = element.getAttributeNS(null,"y"); + var newtransform = "scale(" + factor + ") translate(" + (myX * 1 / factor - myX) + " " + (myY * 1 / factor - myY) +")"; + element.setAttributeNS(null,'transform', newtransform); +} + +/** + * returns the transformation matrix (ctm) for the given node up to the root element + * the basic use case is to provide a wrapper function for the missing SVGLocatable.getTransformToElement method (missing in ASV3) + * @param {svg::SVGTransformable} node the node reference for the SVGElement the ctm is queried + * @return CTM the current transformation matrix from the given node to the root element + * @type svg::SVGMatrix + * @version 1.0 (2007-05-01) + * @credits <a href="http://www.kevlindev.com/tutorials/basics/transformations/toUserSpace/index…">Kevin Lindsey (toUserSpace)</a> + * @see #getTransformToElement + */ +function getTransformToRootElement(node) { + try { + //this part is for fully conformant players (like Opera, Batik, Firefox, Safari ...) + var CTM = node.getTransformToElement(document.documentElement); + } + catch (ex) { + //this part is for ASV3 or other non-conformant players + // Initialize our CTM the node's Current Transformation Matrix + var CTM = node.getCTM(); + // Work our way through the ancestor nodes stopping at the SVG Document + while ( ( node = node.parentNode ) != document ) { + // Multiply the new CTM to the one with what we have accumulated so far + CTM = node.getCTM().multiply(CTM); + } + } + return CTM; +} + +/** + * returns the transformation matrix (ctm) for the given dom::Node up to a different dom::Node + * the basic use case is to provide a wrapper function for the missing SVGLocatable.getTransformToElement method (missing in ASV3) + * @param {svg::SVGTransformable} node the node reference for the element the where the ctm should be calculated from + * @param {svg::SVGTransformable} targetNode the target node reference for the element the ctm should be calculated to + * @return CTM the current transformation matrix from the given node to the target element + * @type svg::SVGMatrix + * @version 1.0 (2007-05-01) + * @credits <a href="http://www.kevlindev.com/tutorials/basics/transformations/toUserSpace/index…">Kevin Lindsey (toUserSpace)</a> + * @see #getTransformToRootElement + */ +function getTransformToElement(node,targetNode) { + try { + //this part is for fully conformant players + var CTM = node.getTransformToElement(targetNode); + } + catch (ex) { + //this part is for ASV3 or other non-conformant players + // Initialize our CTM the node's Current Transformation Matrix + var CTM = node.getCTM(); + // Work our way through the ancestor nodes stopping at the SVG Document + while ( ( node = node.parentNode ) != targetNode ) { + // Multiply the new CTM to the one with what we have accumulated so far + CTM = node.getCTM().multiply(CTM); + } + } + return CTM; +} + +/** + * converts HSV to RGB values + * @param {Number} hue the hue value (between 0 and 360) + * @param {Number} sat the saturation value (between 0 and 1) + * @param {Number} val the value value (between 0 and 1) + * @return rgbArr the rgb values (associative array or object, the keys are: red,green,blue), all values are scaled between 0 and 255 + * @type Object + * @version 1.0 (2007-05-01) + * @see #rgb2hsv + */ +function hsv2rgb(hue,sat,val) { + var rgbArr = new Object(); + if ( sat == 0) { + rgbArr["red"] = Math.round(val * 255); + rgbArr["green"] = Math.round(val * 255); + rgbArr["blue"] = Math.round(val * 255); + } + else { + var h = hue / 60; + var i = Math.floor(h); + var f = h - i; + if (i % 2 == 0) { + f = 1 - f; + } + var m = val * (1 - sat); + var n = val * (1 - sat * f); + switch(i) { + case 0: + rgbArr["red"] = val; + rgbArr["green"] = n; + rgbArr["blue"] = m; + break; + case 1: + rgbArr["red"] = n; + rgbArr["green"] = val; + rgbArr["blue"] = m; + break; + case 2: + rgbArr["red"] = m; + rgbArr["green"] = val; + rgbArr["blue"] = n; + break; + case 3: + rgbArr["red"] = m; + rgbArr["green"] = n; + rgbArr["blue"] = val; + break; + case 4: + rgbArr["red"] = n; + rgbArr["green"] = m; + rgbArr["blue"] = val; + break; + case 5: + rgbArr["red"] = val; + rgbArr["green"] = m; + rgbArr["blue"] = n; + break; + case 6: + rgbArr["red"] = val; + rgbArr["green"] = n; + rgbArr["blue"] = m; + break; + } + rgbArr["red"] = Math.round(rgbArr["red"] * 255); + rgbArr["green"] = Math.round(rgbArr["green"] * 255); + rgbArr["blue"] = Math.round(rgbArr["blue"] * 255); + } + return rgbArr; +} + +/** + * converts RGB to HSV values + * @param {Number} red the hue value (between 0 and 255) + * @param {Number} green the saturation value (between 0 and 255) + * @param {Number} blue the value value (between 0 and 255) + * @return hsvArr the hsv values (associative array or object, the keys are: hue (0-360),sat (0-1),val (0-1)) + * @type Object + * @version 1.0 (2007-05-01) + * @see #hsv2rgb + */ +function rgb2hsv(red,green,blue) { + var hsvArr = new Object(); + red = red / 255; + green = green / 255; + blue = blue / 255; + myMax = Math.max(red, Math.max(green,blue)); + myMin = Math.min(red, Math.min(green,blue)); + v = myMax; + if (myMax > 0) { + s = (myMax - myMin) / myMax; + } + else { + s = 0; + } + if (s > 0) { + myDiff = myMax - myMin; + rc = (myMax - red) / myDiff; + gc = (myMax - green) / myDiff; + bc = (myMax - blue) / myDiff; + if (red == myMax) { + h = (bc - gc) / 6; + } + if (green == myMax) { + h = (2 + rc - bc) / 6; + } + if (blue == myMax) { + h = (4 + gc - rc) / 6; + } + } + else { + h = 0; + } + if (h < 0) { + h += 1; + } + hsvArr["hue"] = Math.round(h * 360); + hsvArr["sat"] = s; + hsvArr["val"] = v; + return hsvArr; +} + +/** + * populates an array such that it can be addressed by both a key or an index nr, + * note that both Arrays need to be of the same length + * @param {Array} arrayKeys the array containing the keys + * @param {Array} arrayValues the array containing the values + * @return returnArray the resulting array containing both associative values and also a regular indexed array + * @type Array + * @version 1.0 (2007-05-01) + */ +function arrayPopulate(arrayKeys,arrayValues) { + var returnArray = new Array(); + if (arrayKeys.length != arrayValues.length) { + alert("error: arrays do not have the same length!"); + } + else { + for (i=0;i<arrayKeys.length;i++) { + returnArray[arrayKeys[i]] = arrayValues[i]; + } + } + return returnArray; +} + +/** + * Wrapper object for network requests, uses getURL or XMLHttpRequest depending on availability + * The callBackFunction receives a XML or text node representing the rootElement + * of the fragment received or the return text, depending on the returnFormat. + * See also the following <a href="http://www.carto.net/papers/svg/network_requests/">documentation</a>. + * @class this is a wrapper object to provide network request functionality (get|post) + * @param {String} url the URL/IRI of the network resource to be called + * @param {Function|Object} callBackFunction the callBack function or object that is called after the data was received, in case of an object, the method 'receiveData' is called; both the function and the object's 'receiveData' method get 2 return parameters: 'node.firstChild'|text (the root element of the XML or text resource), this.additionalParams (if defined) + * @param {String} returnFormat the return format, either 'xml' or 'json' (or text) + * @param {String} method the method of the network request, either 'get' or 'post' + * @param {String|Undefined} postText the String containing the post text (optional) or Undefined (if not a 'post' request) + * @param {Object|Array|String|Number|Undefined} additionalParams additional parameters that will be passed to the callBackFunction or object (optional) or Undefined + * @return a new getData instance + * @type getData + * @constructor + * @version 1.0 (2007-02-23) + */ +function getData(url,callBackFunction,returnFormat,method,postText,additionalParams) { + this.url = url; + this.callBackFunction = callBackFunction; + this.returnFormat = returnFormat; + this.method = method; + this.additionalParams = additionalParams; + if (method != "get" && method != "post") { + alert("Error in network request: parameter 'method' must be 'get' or 'post'"); + } + this.postText = postText; + this.xmlRequest = null; //@private reference to the XMLHttpRequest object +} + +/** + * triggers the network request defined in the constructor + */ +getData.prototype.getData = function() { + //call getURL() if available + if (window.getURL) { + if (this.method == "get") { + getURL(this.url,this); + } + if (this.method == "post") { + postURL(this.url,this.postText,this); + } + } + //or call XMLHttpRequest() if available + else if (window.XMLHttpRequest) { + var _this = this; + this.xmlRequest = new XMLHttpRequest(); + if (this.method == "get") { + if (this.returnFormat == "xml") { + this.xmlRequest.overrideMimeType("text/xml"); + } + this.xmlRequest.open("GET",this.url,true); + } + if (this.method == "post") { + this.xmlRequest.open("POST",this.url,true); + } + this.xmlRequest.onreadystatechange = function() {_this.handleEvent()}; + if (this.method == "get") { + this.xmlRequest.send(null); + } + if (this.method == "post") { + //test if postText exists and is of type string + var reallyPost = true; + if (!this.postText) { + reallyPost = false; + alert("Error in network post request: missing parameter 'postText'!"); + } + if (typeof(this.postText) != "string") { + reallyPost = false; + alert("Error in network post request: parameter 'postText' has to be of type 'string')"); + } + if (reallyPost) { + this.xmlRequest.send(this.postText); + } + } + } + //write an error message if neither method is available + else { + alert("your browser/svg viewer neither supports window.getURL nor window.XMLHttpRequest!"); + } +} + +/** + * this is the callback method for the getURL() or postURL() case + * @private + */ +getData.prototype.operationComplete = function(data) { + //check if data has a success property + if (data.success) { + //parse content of the XML format to the variable "node" + if (this.returnFormat == "xml") { + //convert the text information to an XML node and get the first child + var node = parseXML(data.content,document); + //distinguish between a callback function and an object + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(node.firstChild,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(node.firstChild,this.additionalParams); + } + } + if (this.returnFormat == "json") { + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(data.content,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(data.content,this.additionalParams); + } + } + } + else { + alert("something went wrong with dynamic loading of geometry!"); + } +} + +/** + * this is the callback method for the XMLHttpRequest case + * @private + */ +getData.prototype.handleEvent = function() { + if (this.xmlRequest.readyState == 4) { + if (this.returnFormat == "xml") { + //we need to import the XML node first + var importedNode = document.importNode(this.xmlRequest.responseXML.documentElement,true); + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(importedNode,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(importedNode,this.additionalParams); + } + } + if (this.returnFormat == "json") { + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(this.xmlRequest.responseText,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(this.xmlRequest.responseText,this.additionalParams); + } + } + } +} + +/** + * Serializes an XML node and returns a string representation. Wrapper function to hide implementation differences. + * This can be used for debugging purposes or to post data to a server or network resource. + * @param {dom::Node} node the DOM node reference + * @return textRepresentation the String representation of the XML node + * @type String + * @version 1.0 (2007-05-01) + * @see getData + */ +function serializeNode(node) { + if (typeof XMLSerializer != 'undefined') { + return new XMLSerializer().serializeToString(node); + } + else if (typeof node.xml != 'undefined') { + return node.xml; + } + else if (typeof printNode != 'undefined') { + return printNode(node); + } + else if (typeof Packages != 'undefined') { + try { + var stringWriter = new java.io.StringWriter(); + Packages.org.apache.batik.dom.util.DOMUtilities.writeNode(node,stringWriter); + return stringWriter.toString(); + } + catch (e) { + alert("Sorry, your SVG viewer does not support the printNode/serialize function."); + return ''; + } + } + else { + alert("Sorry, your SVG viewer does not support the printNode/serialize function."); + return ''; + } +} + +/** + * Starts a SMIL animation element with the given id by triggering the '.beginElement()' method. + * This is a convenience (shortcut) function. + * @param {String} id a valid id of a valid SMIL animation element + * @version 1.0 (2007-05-01) + */ +//starts an animtion with the given id +//this function is useful in combination with window.setTimeout() +function startAnimation(id) { + document.getElementById(id).beginElement(); +} diff -r a6e3f4cae4ce -r cdd8d520f3c7 static/scripts/timer.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/scripts/timer.js Tue Apr 13 11:23:19 2010 -0400 @@ -0,0 +1,74 @@ +// source/credits: "Algorithm": http://www.codingforums.com/showthread.php?s=&threadid=10531 +// The constructor should be called with +// the parent object (optional, defaults to window). + +function Timer(){ + this.obj = (arguments.length)?arguments[0]:window; + return this; +} + +// The set functions should be called with: +// - The name of the object method (as a string) (required) +// - The millisecond delay (required) +// - Any number of extra arguments, which will all be +// passed to the method when it is evaluated. + +Timer.prototype.setInterval = function(func, msec){ + var i = Timer.getNew(); + var t = Timer.buildCall(this.obj, i, arguments); + Timer.set[i].timer = window.setInterval(t,msec); + return i; +} +Timer.prototype.setTimeout = function(func, msec){ + var i = Timer.getNew(); + Timer.buildCall(this.obj, i, arguments); + Timer.set[i].timer = window.setTimeout("Timer.callOnce("+i+");",msec); + return i; +} + +// The clear functions should be called with +// the return value from the equivalent set function. + +Timer.prototype.clearInterval = function(i){ + if(!Timer.set[i]) return; + window.clearInterval(Timer.set[i].timer); + Timer.set[i] = null; +} +Timer.prototype.clearTimeout = function(i){ + if(!Timer.set[i]) return; + window.clearTimeout(Timer.set[i].timer); + Timer.set[i] = null; +} + +// Private data + +Timer.set = new Array(); +Timer.buildCall = function(obj, i, args){ + var t = ""; + Timer.set[i] = new Array(); + if(obj != window){ + Timer.set[i].obj = obj; + t = "Timer.set["+i+"].obj."; + } + t += args[0]+"("; + if(args.length > 2){ + Timer.set[i][0] = args[2]; + t += "Timer.set["+i+"][0]"; + for(var j=1; (j+2)<args.length; j++){ + Timer.set[i][j] = args[j+2]; + t += ", Timer.set["+i+"]["+j+"]"; + }} + t += ");"; + Timer.set[i].call = t; + return t; +} +Timer.callOnce = function(i){ + if(!Timer.set[i]) return; + eval(Timer.set[i].call); + Timer.set[i] = null; +} +Timer.getNew = function(){ + var i = 0; + while(Timer.set[i]) i++; + return i; +} \ No newline at end of file diff -r a6e3f4cae4ce -r cdd8d520f3c7 tools/rgenetics/rgGRR.py --- a/tools/rgenetics/rgGRR.py Tue Apr 13 11:17:30 2010 -0400 +++ b/tools/rgenetics/rgGRR.py Tue Apr 13 11:23:19 2010 -0400 @@ -1,1096 +1,1145 @@ -""" -# july 2009: Need to see outliers so need to draw them last? -# could use clustering on the zscores to guess real relationships for unrelateds -# but definitely need to draw last -# added MAX_SHOW_ROWS to limit the length of the main report page -# Changes for Galaxy integration -# added more robust knuth method for one pass mean and sd -# no difference really - let's use scipy.mean() and scipy.std() instead... -# fixed labels and changed to .xls for outlier reports so can open in excel -# interesting - with a few hundred subjects, 5k gives good resolution -# and 100k gives better but not by much -# TODO remove non autosomal markers -# TODO it would be best if label had the zmean and zsd as these are what matter for -# outliers rather than the group mean/sd -# mods to rgGRR.py from channing CVS which John Ziniti has rewritten to produce SVG plots -# to make a Galaxy tool - we need the table of mean and SD for interesting pairs, the SVG and the log -# so the result should be an HTML file - -# rgIBS.py -# use a random subset of markers for a quick ibs -# to identify sample dups and closely related subjects -# try snpMatrix and plink and see which one works best for us? -# abecasis grr plots mean*sd for every subject to show clusters -# mods june 23 rml to avoid non-autosomal markers -# we seem to be distinguishing parent-child by gender - 2 clouds! - - -snpMatrix from David Clayton has: -ibs.stats function to calculate the identity-by-state stats of a group of samples -Description -Given a snp.matrix-class or a X.snp.matrix-class object with N samples, calculates some statistics -about the relatedness of every pair of samples within. - -Usage -ibs.stats(x) -8 ibs.stats -Arguments -x a snp.matrix-class or a X.snp.matrix-class object containing N samples -Details -No-calls are excluded from consideration here. -Value -A data.frame containing N(N - 1)/2 rows, where the row names are the sample name pairs separated -by a comma, and the columns are: -Count count of identical calls, exclusing no-calls -Fraction fraction of identical calls comparied to actual calls being made in both samples -Warning -In some applications, it may be preferable to subset a (random) selection of SNPs first - the -calculation -time increases as N(N - 1)M/2 . Typically for N = 800 samples and M = 3000 SNPs, the -calculation time is about 1 minute. A full GWA scan could take hours, and quite unnecessary for -simple applications such as checking for duplicate or related samples. -Note -This is mostly written to find mislabelled and/or duplicate samples. -Illumina indexes their SNPs in alphabetical order so the mitochondria SNPs comes first - for most -purpose it is undesirable to use these SNPs for IBS purposes. -TODO: Worst-case S4 subsetting seems to make 2 copies of a large object, so one might want to -subset before rbind(), etc; a future version of this routine may contain a built-in subsetting facility -""" -import sys,os,time,random,string,copy,optparse - -try: - set -except NameError: - from Sets import Set as set - -from rgutils import timenow -import plinkbinJZ - - -opts = None -verbose = False - -showPolygons = False - -class NullDevice: - def write(self, s): - pass - -tempstderr = sys.stderr # save -sys.stderr = NullDevice() -# need to avoid blather about deprecation and other strange stuff from scipy -# the current galaxy job runner assumes that -# the job is in error if anything appears on sys.stderr -# grrrrr. James wants to keep it that way instead of using the -# status flag for some strange reason. Presumably he doesn't use R or (in this case, scipy) -import numpy -import scipy -from scipy import weave - - -sys.stderr=tempstderr - - -PROGNAME = os.path.split(sys.argv[0])[-1] -X_AXIS_LABEL = 'Mean Alleles Shared' -Y_AXIS_LABEL = 'SD Alleles Shared' -LEGEND_ALIGN = 'topleft' -LEGEND_TITLE = 'Relationship' -DEFAULT_SYMBOL_SIZE = 1.0 # default symbol size -DEFAULT_SYMBOL_SIZE = 0.5 # default symbol size - -### Some colors for R/rpy -R_BLACK = 1 -R_RED = 2 -R_GREEN = 3 -R_BLUE = 4 -R_CYAN = 5 -R_PURPLE = 6 -R_YELLOW = 7 -R_GRAY = 8 - -### ... and some point-styles - -### -PLOT_HEIGHT = 600 -PLOT_WIDTH = 1150 - - -#SVG_COLORS = ('black', 'darkblue', 'blue', 'deepskyblue', 'firebrick','maroon','crimson') -#SVG_COLORS = ('cyan','dodgerblue','mediumpurple', 'fuchsia', 'red','gold','gray') -SVG_COLORS = ('cyan','dodgerblue','mediumpurple','forestgreen', 'lightgreen','gold','gray') -# dupe,parentchild,sibpair,halfsib,parents,unrel,unkn -#('orange', 'red', 'green', 'chartreuse', 'blue', 'purple', 'gray') - -OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Rel_Mean)\tSdev(Rel_Mean)\tMean(Rel_Sdev)\tSdev(Rel_Sdev)\n' -OUTLIERS_HEADER_list = ['Mean','Sdev','ZMean','ZSdev','FID1','IID1','FID2','IID2', -'RGMean_M','RGMean_SD','RGSD_M','RGSD_SD'] -TABLE_HEADER='fid1 iid1\tfid2 iid2\tmean\tsdev\tzmean\tzsdev\tgeno\trelcode\n' - - -### Relationship codes, text, and lookups/mappings -N_RELATIONSHIP_TYPES = 7 -REL_DUPE, REL_PARENTCHILD, REL_SIBS, REL_HALFSIBS, REL_RELATED, REL_UNRELATED, REL_UNKNOWN = range(N_RELATIONSHIP_TYPES) -REL_LOOKUP = { - REL_DUPE: ('dupe', R_BLUE, 1), - REL_PARENTCHILD: ('parentchild', R_YELLOW, 1), - REL_SIBS: ('sibpairs', R_RED, 1), - REL_HALFSIBS: ('halfsibs', R_GREEN, 1), - REL_RELATED: ('parents', R_PURPLE, 1), - REL_UNRELATED: ('unrelated', R_CYAN, 1), - REL_UNKNOWN: ('unknown', R_GRAY, 1), - } -OUTLIER_STDEVS = { - REL_DUPE: 2, - REL_PARENTCHILD: 2, - REL_SIBS: 2, - REL_HALFSIBS: 2, - REL_RELATED: 2, - REL_UNRELATED: 3, - REL_UNKNOWN: 2, - } -# note now Z can be passed in - -REL_STATES = [REL_LOOKUP[r][0] for r in range(N_RELATIONSHIP_TYPES)] -REL_COLORS = SVG_COLORS -REL_POINTS = [REL_LOOKUP[r][2] for r in range(N_RELATIONSHIP_TYPES)] - -DEFAULT_MAX_SAMPLE_SIZE = 10000 - -REF_COUNT_HOM1 = 3 -REF_COUNT_HET = 2 -REF_COUNT_HOM2 = 1 -MISSING = 0 -MAX_SHOW_ROWS = 100 # framingham has millions - delays showing output page - so truncate and explain -MARKER_PAIRS_PER_SECOND_SLOW = 15000000.0 -MARKER_PAIRS_PER_SECOND_FAST = 70000000.0 - - -galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> -<head> -<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> -<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> -<title></title> -<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> -</head> -<body> -<div class="document"> -""" - - -SVG_HEADER = '''<?xml version="1.0" standalone="no"?> -<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.2//EN" "http://www.w3.org/Graphics/SVG/1.2/DTD/svg12.dtd"> - -<svg width="1280" height="800" - xmlns="http://www.w3.org/2000/svg" version="1.2" - xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1280 800" onload="init()"> - - <script type="text/ecmascript" xlink:href="/static/scripts/tools/rgenetics/checkbox_and_radiobutton.js"/> - <script type="text/ecmascript" xlink:href="/static/scripts/tools/rgenetics/helper_functions.js"/> - <script type="text/ecmascript" xlink:href="/static/scripts/tools/rgenetics/timer.js"/> - <script type="text/ecmascript"> - <![CDATA[ - var checkBoxes = new Array(); - var radioGroupBandwidth; - var colours = ['%s','%s','%s','%s','%s','%s','%s']; - function init() { - var style = {"font-family":"Arial,Helvetica", "fill":"black", "font-size":12}; - var dist = 12; - var yOffset = 4; - - //A checkBox for each relationship type dupe,parentchild,sibpair,halfsib,parents,unrel,unkn - checkBoxes["dupe"] = new checkBox("dupe","checkboxes",20,40,"cbRect","cbCross",true,"Duplicate",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["parentchild"] = new checkBox("parentchild","checkboxes",20,60,"cbRect","cbCross",true,"Parent-Child",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["sibpairs"] = new checkBox("sibpairs","checkboxes",20,80,"cbRect","cbCross",true,"Sib-pairs",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["halfsibs"] = new checkBox("halfsibs","checkboxes",20,100,"cbRect","cbCross",true,"Half-sibs",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["parents"] = new checkBox("parents","checkboxes",20,120,"cbRect","cbCross",true,"Parents",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["unrelated"] = new checkBox("unrelated","checkboxes",20,140,"cbRect","cbCross",true,"Unrelated",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["unknown"] = new checkBox("unknown","checkboxes",20,160,"cbRect","cbCross",true,"Unknown",style,dist,yOffset,undefined,hideShowLayer); - - } - - function hideShowLayer(id, status, label) { - var vis = "hidden"; - if (status) { - vis = "visible"; - } - document.getElementById(id).setAttributeNS(null, 'visibility', vis); - } - - function showBTT(evt, rel, mm, dm, md, dd, n, mg, dg, lg, hg) { - var x = parseInt(evt.pageX)-250; - var y = parseInt(evt.pageY)-110; - switch(rel) { - case 0: - fill = colours[rel]; - relt = "dupe"; - break; - case 1: - fill = colours[rel]; - relt = "parentchild"; - break; - case 2: - fill = colours[rel]; - relt = "sibpairs"; - break; - case 3: - fill = colours[rel]; - relt = "halfsibs"; - break; - case 4: - fill = colours[rel]; - relt = "parents"; - break; - case 5: - fill = colours[rel]; - relt = "unrelated"; - break; - case 6: - fill = colours[rel]; - relt = "unknown"; - break; - default: - fill = "cyan"; - relt = "ERROR_CODE: "+rel; - } - - document.getElementById("btRel").textContent = "GROUP: "+relt; - document.getElementById("btMean").textContent = "mean="+mm+" +/- "+dm; - document.getElementById("btSdev").textContent = "sdev="+dm+" +/- "+dd; - document.getElementById("btPair").textContent = "npairs="+n; - document.getElementById("btGeno").textContent = "ngenos="+mg+" +/- "+dg+" (min="+lg+", max="+hg+")"; - document.getElementById("btHead").setAttribute('fill', fill); - - var tt = document.getElementById("btTip"); - tt.setAttribute("transform", "translate("+x+","+y+")"); - tt.setAttribute('visibility', 'visible'); - } - - function showOTT(evt, rel, s1, s2, mean, sdev, ngeno, rmean, rsdev) { - var x = parseInt(evt.pageX)-150; - var y = parseInt(evt.pageY)-180; - - switch(rel) { - case 0: - fill = colours[rel]; - relt = "dupe"; - break; - case 1: - fill = colours[rel]; - relt = "parentchild"; - break; - case 2: - fill = colours[rel]; - relt = "sibpairs"; - break; - case 3: - fill = colours[rel]; - relt = "halfsibs"; - break; - case 4: - fill = colours[rel]; - relt = "parents"; - break; - case 5: - fill = colours[rel]; - relt = "unrelated"; - break; - case 6: - fill = colours[rel]; - relt = "unknown"; - break; - default: - fill = "cyan"; - relt = "ERROR_CODE: "+rel; - } - - document.getElementById("otRel").textContent = "PAIR: "+relt; - document.getElementById("otS1").textContent = "s1="+s1; - document.getElementById("otS2").textContent = "s2="+s2; - document.getElementById("otMean").textContent = "mean="+mean; - document.getElementById("otSdev").textContent = "sdev="+sdev; - document.getElementById("otGeno").textContent = "ngenos="+ngeno; - document.getElementById("otRmean").textContent = "relmean="+rmean; - document.getElementById("otRsdev").textContent = "relsdev="+rsdev; - document.getElementById("otHead").setAttribute('fill', fill); - - var tt = document.getElementById("otTip"); - tt.setAttribute("transform", "translate("+x+","+y+")"); - tt.setAttribute('visibility', 'visible'); - } - - function hideBTT(evt) { - document.getElementById("btTip").setAttributeNS(null, 'visibility', 'hidden'); - } - - function hideOTT(evt) { - document.getElementById("otTip").setAttributeNS(null, 'visibility', 'hidden'); - } - - ]]> - </script> - <defs> -  - <symbol id="cbRect" overflow="visible"> - <rect x="-5" y="-5" width="10" height="10" fill="white" stroke="dimgray" stroke-width="1" cursor="pointer"/> - </symbol> - <symbol id="cbCross" overflow="visible"> - <g pointer-events="none" stroke="black" stroke-width="1"> - <line x1="-3" y1="-3" x2="3" y2="3"/> - <line x1="3" y1="-3" x2="-3" y2="3"/> - </g> - </symbol> - </defs> - -<desc>Developer Works Dynamic Scatter Graph Scaling Example</desc> - - -<g style="stroke-width:1.0; stroke:black; shape-rendering:crispEdges"> -  - <path d="M 100 100 L 1250 100 Z"/> - <path d="M 100 700 L 1250 700 Z"/> - -  - <path d="M 100 100 L 100 700 Z"/> - <path d="M 1250 100 L 1250 700 Z"/> -</g> - -<g transform="translate(100,100)"> - -  - <g style="fill:none; stroke:#dddddd; stroke-width:1; stroke-dasharray:2,2; text-anchor:end; shape-rendering:crispEdges"> - -  - <line x1="125" y1="0" x2="115" y2="600" /> - <line x1="230" y1="0" x2="230" y2="600" /> - <line x1="345" y1="0" x2="345" y2="600" /> - <line x1="460" y1="0" x2="460" y2="600" /> - <line x1="575" y1="0" x2="575" y2="600" style="stroke-dasharray:none;" /> - <line x1="690" y1="0" x2="690" y2="600" /> - <line x1="805" y1="0" x2="805" y2="600" /> - <line x1="920" y1="0" x2="920" y2="600" /> - <line x1="1035" y1="0" x2="1035" y2="600" /> - -  - <line x1="0" y1="60" x2="1150" y2="60" /> - <line x1="0" y1="120" x2="1150" y2="120" /> - <line x1="0" y1="180" x2="1150" y2="180" /> - <line x1="0" y1="240" x2="1150" y2="240" /> - <line x1="0" y1="300" x2="1150" y2="300" style="stroke-dasharray:none;" /> - <line x1="0" y1="360" x2="1150" y2="360" /> - <line x1="0" y1="420" x2="1150" y2="420" /> - <line x1="0" y1="480" x2="1150" y2="480" /> - <line x1="0" y1="540" x2="1150" y2="540" /> - </g> - -  - <g style="fill:black; stroke:none" font-size="12" font-family="Arial" transform="translate(25,25)"> - <rect width="160" height="270" style="fill:none; stroke:black; shape-rendering:crispEdges" /> - <text x="5" y="20" style="fill:black; stroke:none;" font-size="13" font-weight="bold">Given Pair Relationship</text> - <rect x="120" y="35" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="55" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="75" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="95" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="115" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="135" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="155" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <text x="15" y="195" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore gt 15</text> - <circle cx="125" cy="192" r="6" style="stroke:red; fill:gold; fill-opacity:1.0; stroke-width:1;"/> - <text x="15" y="215" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore 4 to 15</text> - <circle cx="125" cy="212" r="3" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> - <text x="15" y="235" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore lt 4</text> - <circle cx="125" cy="232" r="2" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> - <g id="checkboxes"> - </g> - </g> - - - <g style='fill:black; stroke:none' font-size="17" font-family="Arial"> -  - <text x="480" y="660">Mean Alleles Shared</text> - <text x="0" y="630" >1.0</text> - <text x="277" y="630" >1.25</text> - <text x="564" y="630" >1.5</text> - <text x="842" y="630" >1.75</text> - <text x="1140" y="630" >2.0</text> - </g> - - <g transform="rotate(270)" style="fill:black; stroke:none" font-size="17" font-family="Arial"> -  - <text x="-350" y="-40">SD Alleles Shared</text> - <text x="-20" y="-10" >1.0</text> - <text x="-165" y="-10" >0.75</text> - <text x="-310" y="-10" >0.5</text> - <text x="-455" y="-10" >0.25</text> - <text x="-600" y="-10" >0.0</text> - </g> - - -<g style="fill:black; stroke:none" font-size="18" font-family="Arial"> - <text x="425" y="-30">%s</text> -</g> - - -''' - -SVG_FOOTER = ''' - -</g> -<g id="btTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> - <rect width="250" height="110" style="fill:silver" rx="2" ry="2"/> - <rect id="btHead" width="250" height="20" rx="2" ry="2" /> - <text id="btRel" y="14" x="85">unrelated</text> - <text id="btMean" y="40" x="4">mean=1.5 +/- 0.04</text> - <text id="btSdev" y="60" x="4">sdev=0.7 +/- 0.03</text> - <text id="btPair" y="80" x="4">npairs=1152</text> - <text id="btGeno" y="100" x="4">ngenos=4783 +/- 24 (min=1000, max=5000)</text> -</g> - -<g id="otTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> - <rect width="150" height="180" style="fill:silver" rx="2" ry="2"/> - <rect id="otHead" width="150" height="20" rx="2" ry="2" /> - <text id="otRel" y="14" x="40">sibpairs</text> - <text id="otS1" y="40" x="4">s1=fid1,iid1</text> - <text id="otS2" y="60" x="4">s2=fid2,iid2</text> - <text id="otMean" y="80" x="4">mean=1.82</text> - <text id="otSdev" y="100" x="4">sdev=0.7</text> - <text id="otGeno" y="120" x="4">ngeno=4487</text> - <text id="otRmean" y="140" x="4">relmean=1.85</text> - <text id="otRsdev" y="160" x="4">relsdev=0.65</text> -</g> -</svg> -''' - -OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Mean)\tSdev(Mean)\tMean(Sdev)\tSdev(Sdev)\n' - -DEFAULT_MAX_SAMPLE_SIZE = 5000 - -REF_COUNT_HOM1 = 3 -REF_COUNT_HET = 2 -REF_COUNT_HOM2 = 1 -MISSING = 0 - -MARKER_PAIRS_PER_SECOND_SLOW = 15000000 -MARKER_PAIRS_PER_SECOND_FAST = 70000000 - -POLYGONS = { - REL_UNRELATED: ((1.360, 0.655), (1.385, 0.730), (1.620, 0.575), (1.610, 0.505)), - REL_HALFSIBS: ((1.630, 0.500), (1.630, 0.550), (1.648, 0.540), (1.648, 0.490)), - REL_SIBS: ((1.660, 0.510), (1.665, 0.560), (1.820, 0.410), (1.820, 0.390)), - REL_PARENTCHILD: ((1.650, 0.470), (1.650, 0.490), (1.750, 0.440), (1.750, 0.420)), - REL_DUPE: ((1.970, 0.000), (1.970, 0.150), (2.000, 0.150), (2.000, 0.000)), - } - -def distance(point1, point2): - """ Calculate the distance between two points - """ - (x1,y1) = [float(d) for d in point1] - (x2,y2) = [float(d) for d in point2] - dx = abs(x1 - x2) - dy = abs(y1 - y2) - return math.sqrt(dx**2 + dy**2) - -def point_inside_polygon(x, y, poly): - """ Determine if a point (x,y) is inside a given polygon or not - poly is a list of (x,y) pairs. - - Taken from: http://www.ariel.com.au/a/python-point-int-poly.html - """ - - n = len(poly) - inside = False - - p1x,p1y = poly[0] - for i in range(n+1): - p2x,p2y = poly[i % n] - if y > min(p1y,p2y): - if y <= max(p1y,p2y): - if x <= max(p1x,p2x): - if p1y != p2y: - xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x - if p1x == p2x or x <= xinters: - inside = not inside - p1x,p1y = p2x,p2y - return inside - -def readMap(pedfile): - """ - """ - mapfile = pedfile.replace('.ped', '.map') - marker_list = [] - if os.path.exists(mapfile): - print 'readMap: %s' % (mapfile) - fh = file(mapfile, 'r') - for line in fh: - marker_list.append(line.strip().split()) - fh.close() - print 'readMap: %s markers' % (len(marker_list)) - return marker_list - -def calcMeanSD(useme): - """ - A numerically stable algorithm is given below. It also computes the mean. - This algorithm is due to Knuth,[1] who cites Welford.[2] - n = 0 - mean = 0 - M2 = 0 - - foreach x in data: - n = n + 1 - delta = x - mean - mean = mean + delta/n - M2 = M2 + delta*(x - mean) // This expression uses the new value of mean - end for - - variance_n = M2/n - variance = M2/(n - 1) - """ - mean = 0.0 - M2 = 0.0 - sd = 0.0 - n = len(useme) - if n > 1: - for i,x in enumerate(useme): - delta = x - mean - mean = mean + delta/(i+1) # knuth uses n+=1 at start - M2 = M2 + delta*(x - mean) # This expression uses the new value of mean - variance = M2/(n-1) # assume is sample so lose 1 DOF - sd = pow(variance,0.5) - return mean,sd - - -def doIBSpy(inpath='',basename='',outdir=None,logf=None, - nrsSamples=10000,title='title',pdftoo=0,Zcutoff=2.0): - #def doIBS(pedName, title, nrsSamples=None, pdftoo=False): - """ started with snpmatrix but GRR uses actual IBS counts and sd's - """ - repOut = [] # text strings to add to the html display - refallele = {} - tblf = '%s_table.xls' % (title) - tbl = file(os.path.join(outdir,tblf), 'w') - tbl.write(TABLE_HEADER) - svgf = '%s.svg' % (title) - svg = file(os.path.join(outdir,svgf), 'w') - - bedname = '%s.bed' % (inpath) - pedname = '%s.ped' % (inpath) - print 'pedname',pedname - if os.path.exists(bedname): - ped = plinkbinJZ.BPed(inpath) - ped.parse(quick=True) - elif os.path.exists(pedname): - ped = plinkbinJZ.LPed(inpath) - ped.parse() - else: - print >> sys.stdout, '## doIBSpy problem - cannot open %s or %s - cannot run' % (bedname,pedname) - nMarkers = len(ped._markers) - if nMarkers < 5: - print sys.stderr, '### ERROR - %d is too few markers for reliable estimation in %s - terminating' % (nMarkers,PROGNAME) - sys.exit(1) - nSubjects = len(ped._subjects) - nrsSamples = min(nMarkers, nrsSamples) - if opts and opts.use_mito: - markers = range(nMarkers) - nrsSamples = min(len(markers), nrsSamples) - sampleIndexes = sorted(random.sample(markers, nrsSamples)) - else: - autosomals = ped.autosomal_indices() - nrsSamples = min(len(autosomals), nrsSamples) - sampleIndexes = sorted(random.sample(autosomals, nrsSamples)) - - print '' - print 'Getting random.sample of %s from %s total' % (nrsSamples, nMarkers) - npairs = (nSubjects*(nSubjects-1))/2 # total rows in table - newfiles=[svgf,tblf] - explanations = ['rgGRR Plot (requires SVG)','Mean by SD alleles shared - %d rows' % npairs] - # these go with the output file links in the html file - s = 'Reading genotypes for %s subjects and %s markers\n' % (nSubjects, nrsSamples) - logf.write(s) - minUsegenos = nrsSamples/2 # must have half? - nGenotypes = nSubjects*nrsSamples - stime = time.time() - emptyRows = set() - genos = numpy.zeros((nSubjects, nrsSamples), dtype=int) - for s in xrange(nSubjects): - nValid = 0 - #getGenotypesByIndices(self, s, mlist, format) - genos[s] = ped.getGenotypesByIndices(s, sampleIndexes, format='ref') - nValid = sum([1 for g in genos[s] if g]) - if not nValid: - emptyRows.add(s) - sub = ped.getSubject(s) - print 'All missing for row %d (%s)' % (s, sub) - logf.write('All missing for row %d (%s)\n' % (s, sub)) - rtime = time.time() - stime - if verbose: - print '@@Read %s genotypes in %s seconds' % (nGenotypes, rtime) - - - ### Now the expensive part. For each pair of subjects, we get the mean number - ### and standard deviation of shared alleles over all of the markers where both - ### subjects have a known genotype. Identical subjects should have mean shared - ### alleles very close to 2.0 with a standard deviation very close to 0.0. - tot = nSubjects*(nSubjects-1)/2 - nprog = tot/10 - nMarkerpairs = tot * nrsSamples - estimatedTimeSlow = nMarkerpairs/MARKER_PAIRS_PER_SECOND_SLOW - estimatedTimeFast = nMarkerpairs/MARKER_PAIRS_PER_SECOND_FAST - - pairs = [] - pair_data = {} - means = [] ## Mean IBS for each pair - ngenoL = [] ## Count of comparable genotypes for each pair - sdevs = [] ## Standard dev for each pair - rels = [] ## A relationship code for each pair - zmeans = [0.0 for x in xrange(tot)] ## zmean score for each pair for the relgroup - zstds = [0.0 for x in xrange(tot)] ## zstd score for each pair for the relgrp - skip = set() - ndone = 0 ## How many have been done so far - - logf.write('Calculating %d pairs, updating every %d pairs...\n' % (tot, nprog)) - logf.write('Estimated time is %2.2f to %2.2f seconds ...\n' % (estimatedTimeFast, estimatedTimeSlow)) - - t1sum = 0 - t2sum = 0 - t3sum = 0 - now = time.time() - scache = {} - _founder_cache = {} - C_CODE = """ - #include "math.h" - int i; - int sumibs = 0; - int ssqibs = 0; - int ngeno = 0; - float mean = 0; - float M2 = 0; - float delta = 0; - float sdev=0; - float variance=0; - for (i=0; i<nrsSamples; i++) { - int a1 = g1[i]; - int a2 = g2[i]; - if (a1 != 0 && a2 != 0) { - ngeno += 1; - int shared = 2-abs(a1-a2); - delta = shared - mean; - mean = mean + delta/ngeno; - M2 += delta*(shared-mean); - // yes that second time, the updated mean is used see calcmeansd above; - //printf("%d %d %d %d %d %d\\n", i, a1, a2, ngeno, shared, squared); - } - } - if (ngeno > 1) { - variance = M2/(ngeno-1); - sdev = sqrt(variance); - //printf("OK: %d %3.2f %3.2f\\n", ngeno, mean, sdev); - } - //printf("%d %d %d %1.2f %1.2f\\n", ngeno, sumibs, ssqibs, mean, sdev); - result[0] = ngeno; - result[1] = mean; - result[2] = sdev; - return_val = ngeno; - """ - started = time.time() - for s1 in xrange(nSubjects): - if s1 in emptyRows: - continue - (fid1,iid1,did1,mid1,sex1,phe1,iid1,d_sid1,m_sid1) = scache.setdefault(s1, ped.getSubject(s1)) - - isFounder1 = _founder_cache.setdefault(s1, (did1==mid1)) - g1 = genos[s1] - - for s2 in xrange(s1+1, nSubjects): - if s2 in emptyRows: - continue - if nprog and ndone % nprog == 0 and ndone > 1: - dur = time.time() - started - pct = float(ndone)/tot*100.0 - logf.write('%f sec at pair %d of %d (%3.2f%%): %f marker*pairs/sec\n' % (dur, ndone, tot, pct, ndone/dur*nrsSamples)) - t1s = time.time() - - (fid2,iid2,did2,mid2,sex2,phe2,iid2,d_sid2,m_sid2) = scache.setdefault(s2, ped.getSubject(s2)) - - g2 = genos[s2] - isFounder2 = _founder_cache.setdefault(s2, (did2==mid2)) - - # Determine the relationship for this pair - relcode = REL_UNKNOWN - if (fid2 == fid1): - if iid1 == iid2: - relcode = REL_DUPE - elif (did2 == did1) and (mid2 == mid1) and did1 != mid1: - relcode = REL_SIBS - elif (iid1 == mid2) or (iid1 == did2) or (iid2 == mid1) or (iid2 == did1): - relcode = REL_PARENTCHILD - elif (str(did1) != '0' and (did2 == did1)) or (str(mid1) != '0' and (mid2 == mid1)): - relcode = REL_HALFSIBS - else: - # People in the same family should be marked as some other - # form of related. In general, these people will have a - # pretty random spread of similarity. This distinction is - # probably not very useful most of the time - relcode = REL_RELATED - else: - ### Different families - relcode = REL_UNRELATED - - t1e = time.time() - t1sum += t1e-t1s - - - ### Calculate sum(2-abs(a1-a2)) and sum((2-abs(a1-a2))**2) and count - ### the number of contributing genotypes. These values are not actually - ### calculated here, but instead are looked up in a table for speed. - ### FIXME: This is still too slow ... - result = [0.0, 0.0, 0.0] - ngeno = weave.inline(C_CODE, ['g1', 'g2', 'nrsSamples', 'result']) - if ngeno >= minUsegenos: - _, mean, sdev = result - means.append(mean) - sdevs.append(sdev) - ngenoL.append(ngeno) - pairs.append((s1, s2)) - rels.append(relcode) - else: - skip.add(ndone) # signal no comparable genotypes for this pair - ndone += 1 - t2e = time.time() - t2sum += t2e-t1e - t3e = time.time() - t3sum += t3e-t2e - - logme = [ 'T1: %s' % (t1sum), 'T2: %s' % (t2sum), 'T3: %s' % (t3sum),'TOT: %s' % (t3e-now), - '%s pairs with no (or not enough) comparable genotypes (%3.1f%%)' % (len(skip), - float(len(skip))/float(tot)*100)] - logf.write('%s\n' % '\t'.join(logme)) - ### Calculate mean and standard deviation of scores on a per relationship - ### type basis, allowing us to flag outliers for each particular relationship - ### type - relstats = {} - relCounts = {} - outlierFiles = {} - for relCode, relInfo in REL_LOOKUP.items(): - relName, relColor, relStyle = relInfo - useme = [means[x] for x in xrange(len(means)) if rels[x] == relCode] - relCounts[relCode] = len(useme) - mm = scipy.mean(useme) - ms = scipy.std(useme) - useme = [sdevs[x] for x in xrange(len(sdevs)) if rels[x] == relCode] - sm = scipy.mean(useme) - ss = scipy.std(useme) - relstats[relCode] = {'sd':(sm,ss), 'mean':(mm,ms)} - logf.write('Relstate %s: mean(mean)=%3.2f sdev(mean)=%3.2f, mean(sdev)=%3.2f sdev(sdev)=%3.2f\n' % (relName, mm, ms, sm, ss)) - - ### now fake z scores for each subject like abecasis recommends max(|zmu|,|zsd|) - ### within each group, for each pair, z=(groupmean-pairmean)/groupsd - available = len(means) - logf.write('%d pairs are available of %d\n' % (available, tot)) - ### s = '\nOutliers:\nrelationship\tzmean\tzsd\tped1\tped2\tmean\tsd\trmeanmean\trmeansd\trsdmean\trsdsd\n' - ### logf.write(s) - pairnum = 0 - offset = 0 - nOutliers = 0 - cexs = [] - outlierRecords = dict([(r, []) for r in range(N_RELATIONSHIP_TYPES)]) - zsdmax = 0 - for s1 in range(nSubjects): - if s1 in emptyRows: - continue - (fid1,iid1,did1,mid1,sex1,aff1,ok1,d_sid1,m_sid1) = scache[s1] - for s2 in range(s1+1, nSubjects): - if s2 in emptyRows: - continue - if pairnum not in skip: - ### Get group stats for this relationship - (fid2,iid2,did2,mid2,sex2,aff2,ok2,d_sid2,m_sid2) = scache[s2] - try: - r = rels[offset] - except IndexError: - logf.write('###OOPS offset %d available %d pairnum %d len(rels) %d', offset, available, pairnum, len(rels)) - rmm,rmd = relstats[r]['mean'] # group mean, group meansd alleles shared - rdm,rdd = relstats[r]['sd'] # group sdmean, group sdsd alleles shared - - try: - zsd = (sdevs[offset] - rdm)/rdd # distance from group mean in group sd units - except: - zsd = 1 - if abs(zsd) > zsdmax: - zsdmax = zsd # keep for sort scaling - try: - zmean = (means[offset] - rmm)/rmd # distance from group mean - except: - zmean = 1 - zmeans[offset] = zmean - zstds[offset] = zsd - pid=(s1,s2) - zrad = max(zsd,zmean) - if zrad < 4: - zrad = 2 - elif 4 < zrad < 15: - zrad = 3 # to 9 - else: # > 15 6=24+ - zrad=zrad/4 - zrad = min(zrad,6) # scale limit - zrad = max(2,max(zsd,zmean)) # as > 2, z grows - pair_data[pid] = (zmean,zsd,r,zrad) - if max(zsd,zmean) > Zcutoff: # is potentially interesting - mean = means[offset] - sdev = sdevs[offset] - outlierRecords[r].append((mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd)) - nOutliers += 1 - tbl.write('%s_%s\t%s_%s\t%f\t%f\t%f\t%f\t%d\t%s\n' % \ - (fid1, iid1, fid2, iid2, mean, sdev, zmean,zsd, ngeno, relcode)) - offset += 1 - pairnum += 1 - logf.write( 'Outliers: %s\n' % (nOutliers)) - - ### Write outlier files for each relationship type - repOut.append('<h2>Outliers in tab delimited files linked above are also listed below</h2>') - lzsd = round(numpy.log10(zsdmax)) + 1 - scalefactor = 10**lzsd - for relCode, relInfo in REL_LOOKUP.items(): - relName, _, _ = relInfo - outliers = outlierRecords[relCode] - if not outliers: - continue - outliers = [(scalefactor*int(abs(x[3]))+ int(abs(x[2])),x) for x in outliers] # decorate - outliers.sort() - logf.write('### outliers after decorated sort=%s' % outliers) - outliers.reverse() # largest deviation first - logf.write('### outliers after decorated sort=%s' % outliers) - outliers = [x[1] for x in outliers] # undecorate - nrows = len(outliers) - truncated = 0 - if nrows > MAX_SHOW_ROWS: - s = '<h3>%s outlying pairs (top %d of %d) from %s</h3><table border="0" cellpadding="3">' % (relName, - MAX_SHOW_ROWS,nrows,title) - truncated = nrows - MAX_SHOW_ROWS - else: - s = '<h3>%s outlying pairs (n=%d) from %s</h3><table border="0" cellpadding="3">' % (relName,nrows,title) - repOut.append(s) - fhname = '%s_rgGRR_%s_outliers.xls' % (title, relName) - fhpath = os.path.join(outdir,fhname) - fh = open(fhpath, 'w') - newfiles.append(fhname) - explanations.append('%s Outlier Pairs %s, N=%d, Cutoff SD=%f' % (relName,title,len(outliers),Zcutoff)) - fh.write(OUTLIERS_HEADER) - s = ''.join(['<th>%s</th>' % x for x in OUTLIERS_HEADER_list]) - repOut.append('<tr align="center">%s</tr>' % s) - for n,rec in enumerate(outliers): - #(mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd) = rec - fh.write('%f\t%f\t%f\t%f\t%s\t%s\t%s\t%s\t%f\t%f\t%f\t%f\n' % tuple(rec)) - # (mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd)) - s = '''<td>%f</td><td>%f</td><td>%f</td><td>%f</td><td>%s</td><td>%s</td> - <td>%s</td><td>%s</td><td>%f</td><td>%f</td><td>%f</td><td>%f</td>''' % tuple(rec) - if n < MAX_SHOW_ROWS: - repOut.append('<tr align="center">%s</tr>' % s) - if truncated > 0: - repOut.append('<H2>WARNING: %d rows truncated - see outlier file for all %d rows</H2>' % (truncated, - nrows)) - fh.close() - repOut.append('</table><p>') - - ### Now, draw the plot in jpeg and svg formats, and optionally in the PDF format - ### if requested - logf.write('Plotting ...') - pointColors = [REL_COLORS[rel] for rel in rels] - pointStyles = [REL_POINTS[rel] for rel in rels] - - mainTitle = '%s (%s subjects, %d snp)' % (title, nSubjects, nrsSamples) - svg.write(SVG_HEADER % (SVG_COLORS[0],SVG_COLORS[1],SVG_COLORS[2],SVG_COLORS[3],SVG_COLORS[4], - SVG_COLORS[5],SVG_COLORS[6],SVG_COLORS[0],SVG_COLORS[0],SVG_COLORS[1],SVG_COLORS[1], - SVG_COLORS[2],SVG_COLORS[2],SVG_COLORS[3],SVG_COLORS[3],SVG_COLORS[4],SVG_COLORS[4], - SVG_COLORS[5],SVG_COLORS[5],SVG_COLORS[6],SVG_COLORS[6],mainTitle)) - #rpy.r.jpeg(filename='%s.jpg' % (title), width=1600, height=1200, pointsize=12, quality=100, bg='white') - #rpy.r.par(mai=(1,1,1,0.5)) - #rpy.r('par(xaxs="i",yaxs="i")') - #rpy.r.plot(means, sdevs, main=mainTitle, ylab=Y_AXIS_LABEL, xlab=X_AXIS_LABEL, cex=cexs, col=pointColors, pch=pointStyles, xlim=(0,2), ylim=(0,2)) - #rpy.r.legend(LEGEND_ALIGN, legend=REL_STATES, pch=REL_POINTS, col=REL_COLORS, title=LEGEND_TITLE) - #rpy.r.grid(nx=10, ny=10, col='lightgray', lty='dotted') - #rpy.r.dev_off() - - ### We will now go through each relationship type to partition plot points - ### into "bulk" and "outlier" groups. Bulk points will represent common - ### mean/sdev pairs and will cover the majority of the points in the plot -- - ### they will use generic tooltip informtion about all of the pairs - ### represented by that point. "Outlier" points will be uncommon pairs, - ### with very specific information in their tooltips. It would be nice to - ### keep hte total number of plotted points in the SVG representation to - ### ~10000 (certainly less than 100000?) - pointMap = {} - orderedRels = [y[1] for y in reversed(sorted([(relCounts.get(x, 0),x) for x in REL_LOOKUP.keys()]))] - # do we really want this? I want out of zone points last and big - for relCode in orderedRels: - svgColor = SVG_COLORS[relCode] - relName, relColor, relStyle = REL_LOOKUP[relCode] - svg.write('<g id="%s" style="stroke:%s; fill:%s; fill-opacity:1.0; stroke-width:1;" cursor="pointer">\n' % (relName, svgColor, svgColor)) - pMap = pointMap.setdefault(relCode, {}) - nPoints = 0 - rpairs=[] - rgenos=[] - rmeans=[] - rsdevs=[] - rz = [] - for x,rel in enumerate(rels): # all pairs - if rel == relCode: - s1,s2 = pairs[x] - pid=(s1,s2) - zmean,zsd,r,zrad = pair_data[pid][:4] - rpairs.append(pairs[x]) - rgenos.append(ngenoL[x]) - rmeans.append(means[x]) - rsdevs.append(sdevs[x]) - rz.append(zrad) - ### Now add the svg point group for this relationship to the svg file - for x in range(len(rmeans)): - svgX = '%d' % ((rmeans[x] - 1.0) * PLOT_WIDTH) # changed so mean scale is 1-2 - svgY = '%d' % (PLOT_HEIGHT - (rsdevs[x] * PLOT_HEIGHT)) # changed so sd scale is 0-1 - s1, s2 = rpairs[x] - (fid1,uid1,did1,mid1,sex1,phe1,iid1,d_sid1,m_sid1) = scache[s1] - (fid2,uid2,did2,mid2,sex2,phe2,iid2,d_sid2,m_sid2) = scache[s2] - ngenos = rgenos[x] - nPoints += 1 - point = pMap.setdefault((svgX, svgY), []) - point.append((rmeans[x], rsdevs[x], fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, ngenos,rz[x])) - for (svgX, svgY) in pMap: - points = pMap[(svgX, svgY)] - svgX = int(svgX) - svgY = int(svgY) - if len(points) > 1: - mmean,dmean = calcMeanSD([p[0] for p in points]) - msdev,dsdev = calcMeanSD([p[1] for p in points]) - mgeno,dgeno = calcMeanSD([p[-1] for p in points]) - mingeno = min([p[-1] for p in points]) - maxgeno = max([p[-1] for p in points]) - svg.write("""<circle cx="%d" cy="%d" r="2" - onmouseover="showBTT(evt, %d, %1.2f, %1.2f, %1.2f, %1.2f, %d, %d, %d, %d, %d)" - onmouseout="hideBTT(evt)" />\n""" % (svgX, svgY, relCode, mmean, dmean, msdev, dsdev, len(points), mgeno, dgeno, mingeno, maxgeno)) - else: - mean, sdev, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, ngenos, zrad = points[0][:12] - rmean = float(relstats[relCode]['mean'][0]) - rsdev = float(relstats[relCode]['sd'][0]) - if zrad < 4: - zrad = 2 - elif 4 < zrad < 9: - zrad = 3 # to 9 - else: # > 9 5=15+ - zrad=zrad/3 - zrad = min(zrad,5) # scale limit - if zrad <= 3: - svg.write('<circle cx="%d" cy="%d" r="%s" onmouseover="showOTT(evt, %d, \'%s,%s,%s,%s\', \'%s,%s,%s,%s\', %1.2f, %1.2f, %s, %1.2f, %1.2f)" onmouseout="hideOTT(evt)" />\n' % (svgX, svgY, zrad, relCode, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, mean, sdev, ngenos, rmean, rsdev)) - else: # highlight pairs a long way from expectation by outlining circle in red - svg.write("""<circle cx="%d" cy="%d" r="%s" style="stroke:red; fill:%s; fill-opacity:1.0; stroke-width:1;" - onmouseover="showOTT(evt, %d, \'%s,%s,%s,%s\', \'%s,%s,%s,%s\', %1.2f, %1.2f, %s, %1.2f, %1.2f)" - onmouseout="hideOTT(evt)" />\n""" % \ - (svgX, svgY, zrad, svgColor, relCode, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, mean, sdev, ngenos, rmean, rsdev)) - svg.write('</g>\n') - - ### Create a pdf as well if indicated on the command line - ### WARNING! for framingham share, with about 50M pairs, this is a 5.5GB pdf! -## if pdftoo: -## pdfname = '%s.pdf' % (title) -## rpy.r.pdf(pdfname, 6, 6) -## rpy.r.par(mai=(1,1,1,0.5)) -## rpy.r('par(xaxs="i",yaxs="i")') -## rpy.r.plot(means, sdevs, main='%s, %d snp' % (title, nSamples), ylab=Y_AXIS_LABEL, xlab=X_AXIS_LABEL, cex=cexs, col=pointColors, pch=pointStyles, xlim=(0,2), ylim=(0,2)) -## rpy.r.legend(LEGEND_ALIGN, legend=REL_STATES, pch=REL_POINTS, col=REL_COLORS, title=LEGEND_TITLE) -## rpy.r.grid(nx=10, ny=10, col='lightgray', lty='dotted') -## rpy.r.dev_off() - - ### Draw polygons - if showPolygons: - svg.write('<g id="polygons" cursor="pointer">\n') - for rel, poly in POLYGONS.items(): - points = ' '.join(['%s,%s' % ((p[0]-1.0)*float(PLOT_WIDTH), (PLOT_HEIGHT - p[1]*PLOT_HEIGHT)) for p in poly]) - svg.write('<polygon points="%s" fill="transparent" style="stroke:%s; stroke-width:1"/>\n' % (points, SVG_COLORS[rel])) - svg.write('</g>\n') - - - svg.write(SVG_FOOTER) - svg.close() - return newfiles,explanations,repOut - -def doIBS(n=100): - """parse parameters from galaxy - expect 'input pbed path' 'basename' 'outpath' 'title' 'logpath' 'n' - <command interpreter="python"> - rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" - '$out_file1' '$out_file1.files_path' "$title" '$n' '$Z' - </command> - - """ - u="""<command interpreter="python"> - rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" - '$out_file1' '$out_file1.files_path' "$title" '$n' '$Z' - </command>""" - - if len(sys.argv) < 8: - print >> sys.stdout, 'Need pbed inpath, basename, out_htmlname, outpath, title, logpath, nSNP, Zcutoff on command line please' - print >> sys.stdout, u - sys.exit(1) - ts = '%s%s' % (string.punctuation,string.whitespace) - ptran = string.maketrans(ts,'_'*len(ts)) - inpath = sys.argv[1] - basename = sys.argv[2] - outhtml = sys.argv[3] - newfilepath = sys.argv[4] - try: - os.makedirs(newfilepath) - except: - pass - title = sys.argv[5].translate(ptran) - logfname = 'Log_%s.txt' % title - logpath = os.path.join(newfilepath,logfname) # log was a child - make part of html extra_files_path zoo - n = int(sys.argv[6]) - try: - Zcutoff = float(sys.argv[7]) - except: - Zcutoff = 2.0 - try: - os.makedirs(newfilepath) - except: - pass - logf = file(logpath,'w') - newfiles,explanations,repOut = doIBSpy(inpath=inpath,basename=basename,outdir=newfilepath, - logf=logf,nrsSamples=n,title=title,pdftoo=0,Zcutoff=Zcutoff) - logf.close() - logfs = file(logpath,'r').readlines() - lf = file(outhtml,'w') - lf.write(galhtmlprefix % PROGNAME) - # this is a mess. todo clean up - should each datatype have it's own directory? Yes - # probably. Then titles are universal - but userId libraries are separate. - s = '<div>Output from %s run at %s<br>\n' % (PROGNAME,timenow()) - lf.write('<h4>%s</h4>\n' % s) - fixed = ["'%s'" % x for x in sys.argv] # add quotes just in case - s = 'If you need to rerun this analysis, the command line was\n<pre>%s</pre>\n</div>' % (' '.join(fixed)) - lf.write(s) - #s = """<object data="%s" type="image/svg+xml" width="%d" height="%d"> - # <embed src="%s" type="image/svg+xml" width="%d" height="%d" /> - # </object>""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT,newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) - s = """ <embed src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) - #s = """ <iframe src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) - lf.write(s) - lf.write('<div><h4>Click the links below to save output files and plots</h4><br><ol>\n') - for i in range(len(newfiles)): - if i == 0: - lf.write('<li><a href="%s" type="image/svg+xml" >%s</a></li>\n' % (newfiles[i],explanations[i])) - else: - lf.write('<li><a href="%s">%s</a></li>\n' % (newfiles[i],explanations[i])) - flist = os.listdir(newfilepath) - for fname in flist: - if not fname in newfiles: - lf.write('<li><a href="%s">%s</a></li>\n' % (fname,fname)) - lf.write('</ol></div>') - lf.write('<div>%s</div>' % ('\n'.join(repOut))) # repOut is a list of tables - lf.write('<div><hr><h3>Log from this job (also stored in %s)</h3><pre>%s</pre><hr></div>' % (logfname,'\n'.join(logfs))) - lf.write('</body></html>\n') - lf.close() - logf.close() - -if __name__ == '__main__': - doIBS() - - +""" +# july 2009: Need to see outliers so need to draw them last? +# could use clustering on the zscores to guess real relationships for unrelateds +# but definitely need to draw last +# added MAX_SHOW_ROWS to limit the length of the main report page +# Changes for Galaxy integration +# added more robust knuth method for one pass mean and sd +# no difference really - let's use scipy.mean() and scipy.std() instead... +# fixed labels and changed to .xls for outlier reports so can open in excel +# interesting - with a few hundred subjects, 5k gives good resolution +# and 100k gives better but not by much +# TODO remove non autosomal markers +# TODO it would be best if label had the zmean and zsd as these are what matter for +# outliers rather than the group mean/sd +# mods to rgGRR.py from channing CVS which John Ziniti has rewritten to produce SVG plots +# to make a Galaxy tool - we need the table of mean and SD for interesting pairs, the SVG and the log +# so the result should be an HTML file + +# rgIBS.py +# use a random subset of markers for a quick ibs +# to identify sample dups and closely related subjects +# try snpMatrix and plink and see which one works best for us? +# abecasis grr plots mean*sd for every subject to show clusters +# mods june 23 rml to avoid non-autosomal markers +# we seem to be distinguishing parent-child by gender - 2 clouds! + + +snpMatrix from David Clayton has: +ibs.stats function to calculate the identity-by-state stats of a group of samples +Description +Given a snp.matrix-class or a X.snp.matrix-class object with N samples, calculates some statistics +about the relatedness of every pair of samples within. + +Usage +ibs.stats(x) +8 ibs.stats +Arguments +x a snp.matrix-class or a X.snp.matrix-class object containing N samples +Details +No-calls are excluded from consideration here. +Value +A data.frame containing N(N - 1)/2 rows, where the row names are the sample name pairs separated +by a comma, and the columns are: +Count count of identical calls, exclusing no-calls +Fraction fraction of identical calls comparied to actual calls being made in both samples +Warning +In some applications, it may be preferable to subset a (random) selection of SNPs first - the +calculation +time increases as N(N - 1)M/2 . Typically for N = 800 samples and M = 3000 SNPs, the +calculation time is about 1 minute. A full GWA scan could take hours, and quite unnecessary for +simple applications such as checking for duplicate or related samples. +Note +This is mostly written to find mislabelled and/or duplicate samples. +Illumina indexes their SNPs in alphabetical order so the mitochondria SNPs comes first - for most +purpose it is undesirable to use these SNPs for IBS purposes. +TODO: Worst-case S4 subsetting seems to make 2 copies of a large object, so one might want to +subset before rbind(), etc; a future version of this routine may contain a built-in subsetting facility +""" +import sys,os,time,random,string,copy,optparse + +try: + set +except NameError: + from Sets import Set as set + +from rgutils import timenow,pruneLD,plinke +import plinkbinJZ + + +opts = None +verbose = False + +showPolygons = False + +class NullDevice: + def write(self, s): + pass + +tempstderr = sys.stderr # save +sys.stderr = NullDevice() +# need to avoid blather about deprecation and other strange stuff from scipy +# the current galaxy job runner assumes that +# the job is in error if anything appears on sys.stderr +# grrrrr. James wants to keep it that way instead of using the +# status flag for some strange reason. Presumably he doesn't use R or (in this case, scipy) +import numpy +import scipy +from scipy import weave + + +sys.stderr=tempstderr + + +PROGNAME = os.path.split(sys.argv[0])[-1] +X_AXIS_LABEL = 'Mean Alleles Shared' +Y_AXIS_LABEL = 'SD Alleles Shared' +LEGEND_ALIGN = 'topleft' +LEGEND_TITLE = 'Relationship' +DEFAULT_SYMBOL_SIZE = 1.0 # default symbol size +DEFAULT_SYMBOL_SIZE = 0.5 # default symbol size + +### Some colors for R/rpy +R_BLACK = 1 +R_RED = 2 +R_GREEN = 3 +R_BLUE = 4 +R_CYAN = 5 +R_PURPLE = 6 +R_YELLOW = 7 +R_GRAY = 8 + +### ... and some point-styles + +### +PLOT_HEIGHT = 600 +PLOT_WIDTH = 1150 + + +#SVG_COLORS = ('black', 'darkblue', 'blue', 'deepskyblue', 'firebrick','maroon','crimson') +#SVG_COLORS = ('cyan','dodgerblue','mediumpurple', 'fuchsia', 'red','gold','gray') +SVG_COLORS = ('cyan','dodgerblue','mediumpurple','forestgreen', 'lightgreen','gold','gray') +# dupe,parentchild,sibpair,halfsib,parents,unrel,unkn +#('orange', 'red', 'green', 'chartreuse', 'blue', 'purple', 'gray') + +OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Rel_Mean)\tSdev(Rel_Mean)\tMean(Rel_Sdev)\tSdev(Rel_Sdev)\n' +OUTLIERS_HEADER_list = ['Mean','Sdev','ZMean','ZSdev','FID1','IID1','FID2','IID2', +'RGMean_M','RGMean_SD','RGSD_M','RGSD_SD'] +TABLE_HEADER='fid1 iid1\tfid2 iid2\tmean\tsdev\tzmean\tzsdev\tgeno\trelcode\n' + + +### Relationship codes, text, and lookups/mappings +N_RELATIONSHIP_TYPES = 7 +REL_DUPE, REL_PARENTCHILD, REL_SIBS, REL_HALFSIBS, REL_RELATED, REL_UNRELATED, REL_UNKNOWN = range(N_RELATIONSHIP_TYPES) +REL_LOOKUP = { + REL_DUPE: ('dupe', R_BLUE, 1), + REL_PARENTCHILD: ('parentchild', R_YELLOW, 1), + REL_SIBS: ('sibpairs', R_RED, 1), + REL_HALFSIBS: ('halfsibs', R_GREEN, 1), + REL_RELATED: ('parents', R_PURPLE, 1), + REL_UNRELATED: ('unrelated', R_CYAN, 1), + REL_UNKNOWN: ('unknown', R_GRAY, 1), + } +OUTLIER_STDEVS = { + REL_DUPE: 2, + REL_PARENTCHILD: 2, + REL_SIBS: 2, + REL_HALFSIBS: 2, + REL_RELATED: 2, + REL_UNRELATED: 3, + REL_UNKNOWN: 2, + } +# note now Z can be passed in + +REL_STATES = [REL_LOOKUP[r][0] for r in range(N_RELATIONSHIP_TYPES)] +REL_COLORS = SVG_COLORS +REL_POINTS = [REL_LOOKUP[r][2] for r in range(N_RELATIONSHIP_TYPES)] + +DEFAULT_MAX_SAMPLE_SIZE = 10000 + +REF_COUNT_HOM1 = 3 +REF_COUNT_HET = 2 +REF_COUNT_HOM2 = 1 +MISSING = 0 +MAX_SHOW_ROWS = 100 # framingham has millions - delays showing output page - so truncate and explain +MARKER_PAIRS_PER_SECOND_SLOW = 15000000.0 +MARKER_PAIRS_PER_SECOND_FAST = 70000000.0 + + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" + + +SVG_HEADER = '''<?xml version="1.0" standalone="no"?> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.2//EN" "http://www.w3.org/Graphics/SVG/1.2/DTD/svg12.dtd"> + +<svg width="1280" height="800" + xmlns="http://www.w3.org/2000/svg" version="1.2" + xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1280 800" onload="init()"> + + <script type="text/ecmascript" xlink:href="/static/scripts/checkbox_and_radiobutton.js"/> + <script type="text/ecmascript" xlink:href="/static/scripts/helper_functions.js"/> + <script type="text/ecmascript" xlink:href="/static/scripts/timer.js"/> + <script type="text/ecmascript"> + <![CDATA[ + var checkBoxes = new Array(); + var radioGroupBandwidth; + var colours = ['%s','%s','%s','%s','%s','%s','%s']; + function init() { + var style = {"font-family":"Arial,Helvetica", "fill":"black", "font-size":12}; + var dist = 12; + var yOffset = 4; + + //A checkBox for each relationship type dupe,parentchild,sibpair,halfsib,parents,unrel,unkn + checkBoxes["dupe"] = new checkBox("dupe","checkboxes",20,40,"cbRect","cbCross",true,"Duplicate",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["parentchild"] = new checkBox("parentchild","checkboxes",20,60,"cbRect","cbCross",true,"Parent-Child",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["sibpairs"] = new checkBox("sibpairs","checkboxes",20,80,"cbRect","cbCross",true,"Sib-pairs",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["halfsibs"] = new checkBox("halfsibs","checkboxes",20,100,"cbRect","cbCross",true,"Half-sibs",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["parents"] = new checkBox("parents","checkboxes",20,120,"cbRect","cbCross",true,"Parents",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["unrelated"] = new checkBox("unrelated","checkboxes",20,140,"cbRect","cbCross",true,"Unrelated",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["unknown"] = new checkBox("unknown","checkboxes",20,160,"cbRect","cbCross",true,"Unknown",style,dist,yOffset,undefined,hideShowLayer); + + } + + function hideShowLayer(id, status, label) { + var vis = "hidden"; + if (status) { + vis = "visible"; + } + document.getElementById(id).setAttributeNS(null, 'visibility', vis); + } + + function showBTT(evt, rel, mm, dm, md, dd, n, mg, dg, lg, hg) { + var x = parseInt(evt.pageX)-250; + var y = parseInt(evt.pageY)-110; + switch(rel) { + case 0: + fill = colours[rel]; + relt = "dupe"; + break; + case 1: + fill = colours[rel]; + relt = "parentchild"; + break; + case 2: + fill = colours[rel]; + relt = "sibpairs"; + break; + case 3: + fill = colours[rel]; + relt = "halfsibs"; + break; + case 4: + fill = colours[rel]; + relt = "parents"; + break; + case 5: + fill = colours[rel]; + relt = "unrelated"; + break; + case 6: + fill = colours[rel]; + relt = "unknown"; + break; + default: + fill = "cyan"; + relt = "ERROR_CODE: "+rel; + } + + document.getElementById("btRel").textContent = "GROUP: "+relt; + document.getElementById("btMean").textContent = "mean="+mm+" +/- "+dm; + document.getElementById("btSdev").textContent = "sdev="+dm+" +/- "+dd; + document.getElementById("btPair").textContent = "npairs="+n; + document.getElementById("btGeno").textContent = "ngenos="+mg+" +/- "+dg+" (min="+lg+", max="+hg+")"; + document.getElementById("btHead").setAttribute('fill', fill); + + var tt = document.getElementById("btTip"); + tt.setAttribute("transform", "translate("+x+","+y+")"); + tt.setAttribute('visibility', 'visible'); + } + + function showOTT(evt, rel, s1, s2, mean, sdev, ngeno, rmean, rsdev) { + var x = parseInt(evt.pageX)-150; + var y = parseInt(evt.pageY)-180; + + switch(rel) { + case 0: + fill = colours[rel]; + relt = "dupe"; + break; + case 1: + fill = colours[rel]; + relt = "parentchild"; + break; + case 2: + fill = colours[rel]; + relt = "sibpairs"; + break; + case 3: + fill = colours[rel]; + relt = "halfsibs"; + break; + case 4: + fill = colours[rel]; + relt = "parents"; + break; + case 5: + fill = colours[rel]; + relt = "unrelated"; + break; + case 6: + fill = colours[rel]; + relt = "unknown"; + break; + default: + fill = "cyan"; + relt = "ERROR_CODE: "+rel; + } + + document.getElementById("otRel").textContent = "PAIR: "+relt; + document.getElementById("otS1").textContent = "s1="+s1; + document.getElementById("otS2").textContent = "s2="+s2; + document.getElementById("otMean").textContent = "mean="+mean; + document.getElementById("otSdev").textContent = "sdev="+sdev; + document.getElementById("otGeno").textContent = "ngenos="+ngeno; + document.getElementById("otRmean").textContent = "relmean="+rmean; + document.getElementById("otRsdev").textContent = "relsdev="+rsdev; + document.getElementById("otHead").setAttribute('fill', fill); + + var tt = document.getElementById("otTip"); + tt.setAttribute("transform", "translate("+x+","+y+")"); + tt.setAttribute('visibility', 'visible'); + } + + function hideBTT(evt) { + document.getElementById("btTip").setAttributeNS(null, 'visibility', 'hidden'); + } + + function hideOTT(evt) { + document.getElementById("otTip").setAttributeNS(null, 'visibility', 'hidden'); + } + + ]]> + </script> + <defs> +  + <symbol id="cbRect" overflow="visible"> + <rect x="-5" y="-5" width="10" height="10" fill="white" stroke="dimgray" stroke-width="1" cursor="pointer"/> + </symbol> + <symbol id="cbCross" overflow="visible"> + <g pointer-events="none" stroke="black" stroke-width="1"> + <line x1="-3" y1="-3" x2="3" y2="3"/> + <line x1="3" y1="-3" x2="-3" y2="3"/> + </g> + </symbol> + </defs> + +<desc>Developer Works Dynamic Scatter Graph Scaling Example</desc> + + +<g style="stroke-width:1.0; stroke:black; shape-rendering:crispEdges"> +  + <path d="M 100 100 L 1250 100 Z"/> + <path d="M 100 700 L 1250 700 Z"/> + +  + <path d="M 100 100 L 100 700 Z"/> + <path d="M 1250 100 L 1250 700 Z"/> +</g> + +<g transform="translate(100,100)"> + +  + <g style="fill:none; stroke:#dddddd; stroke-width:1; stroke-dasharray:2,2; text-anchor:end; shape-rendering:crispEdges"> + +  + <line x1="125" y1="0" x2="115" y2="600" /> + <line x1="230" y1="0" x2="230" y2="600" /> + <line x1="345" y1="0" x2="345" y2="600" /> + <line x1="460" y1="0" x2="460" y2="600" /> + <line x1="575" y1="0" x2="575" y2="600" style="stroke-dasharray:none;" /> + <line x1="690" y1="0" x2="690" y2="600" /> + <line x1="805" y1="0" x2="805" y2="600" /> + <line x1="920" y1="0" x2="920" y2="600" /> + <line x1="1035" y1="0" x2="1035" y2="600" /> + +  + <line x1="0" y1="60" x2="1150" y2="60" /> + <line x1="0" y1="120" x2="1150" y2="120" /> + <line x1="0" y1="180" x2="1150" y2="180" /> + <line x1="0" y1="240" x2="1150" y2="240" /> + <line x1="0" y1="300" x2="1150" y2="300" style="stroke-dasharray:none;" /> + <line x1="0" y1="360" x2="1150" y2="360" /> + <line x1="0" y1="420" x2="1150" y2="420" /> + <line x1="0" y1="480" x2="1150" y2="480" /> + <line x1="0" y1="540" x2="1150" y2="540" /> + </g> + +  + <g style="fill:black; stroke:none" font-size="12" font-family="Arial" transform="translate(25,25)"> + <rect width="160" height="270" style="fill:none; stroke:black; shape-rendering:crispEdges" /> + <text x="5" y="20" style="fill:black; stroke:none;" font-size="13" font-weight="bold">Given Pair Relationship</text> + <rect x="120" y="35" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="55" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="75" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="95" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="115" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="135" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="155" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <text x="15" y="195" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore gt 15</text> + <circle cx="125" cy="192" r="6" style="stroke:red; fill:gold; fill-opacity:1.0; stroke-width:1;"/> + <text x="15" y="215" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore 4 to 15</text> + <circle cx="125" cy="212" r="3" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> + <text x="15" y="235" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore lt 4</text> + <circle cx="125" cy="232" r="2" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> + <g id="checkboxes"> + </g> + </g> + + + <g style='fill:black; stroke:none' font-size="17" font-family="Arial"> +  + <text x="480" y="660">Mean Alleles Shared</text> + <text x="0" y="630" >1.0</text> + <text x="277" y="630" >1.25</text> + <text x="564" y="630" >1.5</text> + <text x="842" y="630" >1.75</text> + <text x="1140" y="630" >2.0</text> + </g> + + <g transform="rotate(270)" style="fill:black; stroke:none" font-size="17" font-family="Arial"> +  + <text x="-350" y="-40">SD Alleles Shared</text> + <text x="-20" y="-10" >1.0</text> + <text x="-165" y="-10" >0.75</text> + <text x="-310" y="-10" >0.5</text> + <text x="-455" y="-10" >0.25</text> + <text x="-600" y="-10" >0.0</text> + </g> + + +<g style="fill:black; stroke:none" font-size="18" font-family="Arial"> + <text x="425" y="-30">%s</text> +</g> + + +''' + +SVG_FOOTER = ''' + +</g> +<g id="btTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> + <rect width="250" height="110" style="fill:silver" rx="2" ry="2"/> + <rect id="btHead" width="250" height="20" rx="2" ry="2" /> + <text id="btRel" y="14" x="85">unrelated</text> + <text id="btMean" y="40" x="4">mean=1.5 +/- 0.04</text> + <text id="btSdev" y="60" x="4">sdev=0.7 +/- 0.03</text> + <text id="btPair" y="80" x="4">npairs=1152</text> + <text id="btGeno" y="100" x="4">ngenos=4783 +/- 24 (min=1000, max=5000)</text> +</g> + +<g id="otTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> + <rect width="150" height="180" style="fill:silver" rx="2" ry="2"/> + <rect id="otHead" width="150" height="20" rx="2" ry="2" /> + <text id="otRel" y="14" x="40">sibpairs</text> + <text id="otS1" y="40" x="4">s1=fid1,iid1</text> + <text id="otS2" y="60" x="4">s2=fid2,iid2</text> + <text id="otMean" y="80" x="4">mean=1.82</text> + <text id="otSdev" y="100" x="4">sdev=0.7</text> + <text id="otGeno" y="120" x="4">ngeno=4487</text> + <text id="otRmean" y="140" x="4">relmean=1.85</text> + <text id="otRsdev" y="160" x="4">relsdev=0.65</text> +</g> +</svg> +''' + +OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Mean)\tSdev(Mean)\tMean(Sdev)\tSdev(Sdev)\n' + +DEFAULT_MAX_SAMPLE_SIZE = 5000 + +REF_COUNT_HOM1 = 3 +REF_COUNT_HET = 2 +REF_COUNT_HOM2 = 1 +MISSING = 0 + +MARKER_PAIRS_PER_SECOND_SLOW = 15000000 +MARKER_PAIRS_PER_SECOND_FAST = 70000000 + +POLYGONS = { + REL_UNRELATED: ((1.360, 0.655), (1.385, 0.730), (1.620, 0.575), (1.610, 0.505)), + REL_HALFSIBS: ((1.630, 0.500), (1.630, 0.550), (1.648, 0.540), (1.648, 0.490)), + REL_SIBS: ((1.660, 0.510), (1.665, 0.560), (1.820, 0.410), (1.820, 0.390)), + REL_PARENTCHILD: ((1.650, 0.470), (1.650, 0.490), (1.750, 0.440), (1.750, 0.420)), + REL_DUPE: ((1.970, 0.000), (1.970, 0.150), (2.000, 0.150), (2.000, 0.000)), + } + +def distance(point1, point2): + """ Calculate the distance between two points + """ + (x1,y1) = [float(d) for d in point1] + (x2,y2) = [float(d) for d in point2] + dx = abs(x1 - x2) + dy = abs(y1 - y2) + return math.sqrt(dx**2 + dy**2) + +def point_inside_polygon(x, y, poly): + """ Determine if a point (x,y) is inside a given polygon or not + poly is a list of (x,y) pairs. + + Taken from: http://www.ariel.com.au/a/python-point-int-poly.html + """ + + n = len(poly) + inside = False + + p1x,p1y = poly[0] + for i in range(n+1): + p2x,p2y = poly[i % n] + if y > min(p1y,p2y): + if y <= max(p1y,p2y): + if x <= max(p1x,p2x): + if p1y != p2y: + xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x + if p1x == p2x or x <= xinters: + inside = not inside + p1x,p1y = p2x,p2y + return inside + +def readMap(pedfile): + """ + """ + mapfile = pedfile.replace('.ped', '.map') + marker_list = [] + if os.path.exists(mapfile): + print 'readMap: %s' % (mapfile) + fh = file(mapfile, 'r') + for line in fh: + marker_list.append(line.strip().split()) + fh.close() + print 'readMap: %s markers' % (len(marker_list)) + return marker_list + +def calcMeanSD(useme): + """ + A numerically stable algorithm is given below. It also computes the mean. + This algorithm is due to Knuth,[1] who cites Welford.[2] + n = 0 + mean = 0 + M2 = 0 + + foreach x in data: + n = n + 1 + delta = x - mean + mean = mean + delta/n + M2 = M2 + delta*(x - mean) // This expression uses the new value of mean + end for + + variance_n = M2/n + variance = M2/(n - 1) + """ + mean = 0.0 + M2 = 0.0 + sd = 0.0 + n = len(useme) + if n > 1: + for i,x in enumerate(useme): + delta = x - mean + mean = mean + delta/(i+1) # knuth uses n+=1 at start + M2 = M2 + delta*(x - mean) # This expression uses the new value of mean + variance = M2/(n-1) # assume is sample so lose 1 DOF + sd = pow(variance,0.5) + return mean,sd + + +def doIBSpy(ped=None,basename='',outdir=None,logf=None, + nrsSamples=10000,title='title',pdftoo=0,Zcutoff=2.0): + #def doIBS(pedName, title, nrsSamples=None, pdftoo=False): + """ started with snpmatrix but GRR uses actual IBS counts and sd's + """ + repOut = [] # text strings to add to the html display + refallele = {} + tblf = '%s_table.xls' % (title) + tbl = file(os.path.join(outdir,tblf), 'w') + tbl.write(TABLE_HEADER) + svgf = '%s.svg' % (title) + svg = file(os.path.join(outdir,svgf), 'w') + + nMarkers = len(ped._markers) + if nMarkers < 5: + print sys.stderr, '### ERROR - %d is too few markers for reliable estimation in %s - terminating' % (nMarkers,PROGNAME) + sys.exit(1) + nSubjects = len(ped._subjects) + nrsSamples = min(nMarkers, nrsSamples) + if opts and opts.use_mito: + markers = range(nMarkers) + nrsSamples = min(len(markers), nrsSamples) + sampleIndexes = sorted(random.sample(markers, nrsSamples)) + else: + autosomals = ped.autosomal_indices() + nrsSamples = min(len(autosomals), nrsSamples) + sampleIndexes = sorted(random.sample(autosomals, nrsSamples)) + + print '' + print 'Getting random.sample of %s from %s total' % (nrsSamples, nMarkers) + npairs = (nSubjects*(nSubjects-1))/2 # total rows in table + newfiles=[svgf,tblf] + explanations = ['rgGRR Plot (requires SVG)','Mean by SD alleles shared - %d rows' % npairs] + # these go with the output file links in the html file + s = 'Reading genotypes for %s subjects and %s markers\n' % (nSubjects, nrsSamples) + logf.write(s) + minUsegenos = nrsSamples/2 # must have half? + nGenotypes = nSubjects*nrsSamples + stime = time.time() + emptyRows = set() + genos = numpy.zeros((nSubjects, nrsSamples), dtype=int) + for s in xrange(nSubjects): + nValid = 0 + #getGenotypesByIndices(self, s, mlist, format) + genos[s] = ped.getGenotypesByIndices(s, sampleIndexes, format='ref') + nValid = sum([1 for g in genos[s] if g]) + if not nValid: + emptyRows.add(s) + sub = ped.getSubject(s) + print 'All missing for row %d (%s)' % (s, sub) + logf.write('All missing for row %d (%s)\n' % (s, sub)) + rtime = time.time() - stime + if verbose: + print '@@Read %s genotypes in %s seconds' % (nGenotypes, rtime) + + + ### Now the expensive part. For each pair of subjects, we get the mean number + ### and standard deviation of shared alleles over all of the markers where both + ### subjects have a known genotype. Identical subjects should have mean shared + ### alleles very close to 2.0 with a standard deviation very close to 0.0. + tot = nSubjects*(nSubjects-1)/2 + nprog = tot/10 + nMarkerpairs = tot * nrsSamples + estimatedTimeSlow = nMarkerpairs/MARKER_PAIRS_PER_SECOND_SLOW + estimatedTimeFast = nMarkerpairs/MARKER_PAIRS_PER_SECOND_FAST + + pairs = [] + pair_data = {} + means = [] ## Mean IBS for each pair + ngenoL = [] ## Count of comparable genotypes for each pair + sdevs = [] ## Standard dev for each pair + rels = [] ## A relationship code for each pair + zmeans = [0.0 for x in xrange(tot)] ## zmean score for each pair for the relgroup + zstds = [0.0 for x in xrange(tot)] ## zstd score for each pair for the relgrp + skip = set() + ndone = 0 ## How many have been done so far + + logf.write('Calculating %d pairs, updating every %d pairs...\n' % (tot, nprog)) + logf.write('Estimated time is %2.2f to %2.2f seconds ...\n' % (estimatedTimeFast, estimatedTimeSlow)) + + t1sum = 0 + t2sum = 0 + t3sum = 0 + now = time.time() + scache = {}

1 0

[hg] galaxy 3634: Added pound to trimmer list
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/a6e3f4cae4ce changeset: 3634:a6e3f4cae4ce user: Anton Nekrutenko <anton(a)bx.psu.edu> date: Tue Apr 13 11:17:30 2010 -0400 description: Added pound to trimmer list diffstat: tools/filters/trimmer.xml | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diffs (11 lines): diff -r 0dc1fc63c945 -r a6e3f4cae4ce tools/filters/trimmer.xml --- a/tools/filters/trimmer.xml Tue Apr 13 10:22:27 2010 -0400 +++ b/tools/filters/trimmer.xml Tue Apr 13 11:17:30 2010 -0400 @@ -28,6 +28,7 @@ <option value="38">&</option> <option value="37">%</option> <option value="94">^</option> + <option value="35">#</option> </param> </inputs> <outputs>

1 0

[hg] galaxy 3633: Add the raw uncompressed .js files needed for ...
by Greg Von Kuster 15 Apr '10

15 Apr '10

details: http://www.bx.psu.edu/hg/galaxy/rev/0b682d3dd01b changeset: 3633:0b682d3dd01b user: fubar: ross Lazarus at gmail period com date: Tue Apr 13 11:15:35 2010 -0400 description: Add the raw uncompressed .js files needed for rgGRR svg display - the packed ones were already there Fix rgManQQ so the manhattan plot is not called for data without chromosome and offset Use LD reduced data for IBD/GRR for faster performance and better resolution of close relationships Add an LD pruning and thinning step to rgGRR - the target composite pbed or lped has the thinned files permanently added for reuse. This is skipped for small numbers of markers since the plink --thin 0.1 option applied to tinywga.pbed will return an empty file which is not really ideal. diffstat: static/scripts/checkbox_and_radiobutton.js | 347 ++++ static/scripts/helper_functions.js | 817 ++++++++++ static/scripts/timer.js | 74 + tools/rgenetics/rgGRR.py | 2241 ++++++++++++++------------- tools/rgenetics/rgManQQ.py | 19 +- 5 files changed, 2396 insertions(+), 1102 deletions(-) diffs (truncated from 3540 to 3000 lines): diff -r 0dc1fc63c945 -r 0b682d3dd01b static/scripts/checkbox_and_radiobutton.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/scripts/checkbox_and_radiobutton.js Tue Apr 13 11:15:35 2010 -0400 @@ -0,0 +1,347 @@ +/* +Scripts to create interactive checkboxes and radio buttons in SVG using ECMA script +Copyright (C) <2007> <Andreas Neumann> +Version 1.1.3, 2007-08-09 +neumann(a)karto.baug.ethz.ch +http://www.carto.net/ +http://www.carto.net/neumann/ + +Credits: +* Guy Morton for providing a fix to let users toggle checkboxes by clicking on text labels +* Bruce Rindahl for providing the bugfix described in version 1.1.2 +* Simon Shutter for providing a fix for the ASV in IE crash when reloading the SVG file after calling the .remove() method on a checkbox + +---- + +Documentation: http://www.carto.net/papers/svg/gui/checkbox_and_radiobutton/ + +---- + +current version: 1.1.3 + +version history: +1.0 (2006-03-13) +initial version + +1.1 (2006-07-11) +text labels are now clickable (thanks to Guy Morton) +added method .moveTo() to move checkbox to a different location +introduced new constructor parameter labelYOffset to allow more flexible placement of the text label + +1.1.1 (2007-02-06) +added cursor pointer to the text label and use element representing the checkBox + +1.1.2 (2007-04-19) +bug fix: this.selectedIndex was not correctly initialized in method addCheckBox of the radioButtonGroup object + +1.1.3 (2007-08-09) +bug fix: the method .remove() was slightly modified (using removeEventListener) for avoiding a crash related to the method after reloading the SVG file + +------- + + +This ECMA script library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library (lesser_gpl.txt); if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +---- + +original document site: http://www.carto.net/papers/svg/gui/checkbox_and_radiobutton/ +Please contact the author in case you want to use code or ideas commercially. +If you use this code, please include this copyright header, the included full +LGPL 2.1 text and read the terms provided in the LGPL 2.1 license +(http://www.gnu.org/copyleft/lesser.txt) + +------------------------------- + +Please report bugs and send improvements to neumann(a)karto.baug.ethz.ch +If you use this control, please link to the original (http://www.carto.net/papers/svg/gui/checkbox_and_radiobutton/) +somewhere in the source-code-comment or the "about" of your project and give credits, thanks! + +*/ + +function checkBox(id,parentNode,x,y,checkboxId,checkcrossId,checkedStatus,labelText,textStyles,labelDistance,labelYOffset,radioButtonGroup,functionToCall) { + var nrArguments = 13; + var createCheckbox= true; + if (arguments.length == nrArguments) { + this.id = id; //an internal id, this id is not used in the SVG Dom tree + this.parentNode = parentNode; //the parentNode, string or nodeReference + this.x = x; //the center of the checkBox + this.y = y; //the center of the checkBox + this.checkboxId = checkboxId; //the id of the checkbox symbol (background) + this.checkcrossId = checkcrossId; //the id of the checkbox symbol (foreground), pointer-events should be set to "none" + this.checkedStatus = checkedStatus; //a status variable (true|false), indicates if checkbox is on or off + this.labelText = labelText; //the text of the checkbox label to be displayed, use undefined or empty string if you don't need a label text + this.textStyles = textStyles; //an array of literals containing the text settings + if (!this.textStyles["font-size"]) { + this.textStyles["font-size"] = 12; + } + this.labelDistance = labelDistance; //a distance defined from the center of the checkbox to the left of the text of the label + this.labelYOffset = labelYOffset; //a y offset value for the text label in relation to the checkbox symbol center + this.radioButtonGroup = radioButtonGroup; //a reference to a radio button group, if this is a standalone checkBox, just use the parameter undefined + this.functionToCall = functionToCall; //the function to call after triggering checkBox + this.exists = true; //status that indicates if checkbox exists or not, is set to false after method .remove() was called + this.label = undefined; //later a reference to the label text node + } + else { + createCheckbox = false; + alert("Error in checkbox ("+id+"): wrong nr of arguments! You have to pass over "+nrArguments+" parameters."); + } + if (createCheckbox) { + //timer stuff + this.timer = new Timer(this); //a Timer instance for calling the functionToCall + if (this.radioButtonGroup) { + this.timerMs = 0; + } + else { + this.timerMs = 200; //a constant of this object that is used in conjunction with the timer - functionToCall is called after 200 ms + } + //create checkbox + this.createCheckBox(); + } + else { + alert("Could not create checkbox with id '"+id+"' due to errors in the constructor parameters"); + } +} + +//this method creates all necessary checkbox geometry +checkBox.prototype.createCheckBox = function() { + if (typeof(this.parentNode) == "string") { + this.parentNode = document.getElementById(this.parentNode); + } + //create checkbox + this.checkBox = document.createElementNS(svgNS,"use"); + this.checkBox.setAttributeNS(null,"x",this.x); + this.checkBox.setAttributeNS(null,"y",this.y); + this.checkBox.setAttributeNS(xlinkNS,"href","#"+this.checkboxId); + this.checkBox.addEventListener("click",this,false); + this.checkBox.setAttributeNS(null,"cursor","pointer"); + this.parentNode.appendChild(this.checkBox); + //create checkcross + this.checkCross = document.createElementNS(svgNS,"use"); + this.checkCross.setAttributeNS(null,"x",this.x); + this.checkCross.setAttributeNS(null,"y",this.y); + this.checkCross.setAttributeNS(xlinkNS,"href","#"+this.checkcrossId); + this.parentNode.appendChild(this.checkCross); + if (this.checkedStatus == false) { + this.checkCross.setAttributeNS(null,"display","none"); + } + //create label, if any + if (this.labelText) { + if (this.labelText.length > 0) { + this.label = document.createElementNS(svgNS,"text"); + for (var attrib in this.textStyles) { + var value = this.textStyles[attrib]; + if (attrib == "font-size") { + value += "px"; + } + this.label.setAttributeNS(null,attrib,value); + } + this.label.setAttributeNS(null,"x",(this.x + this.labelDistance)); + this.label.setAttributeNS(null,"y",(this.y + this.labelYOffset)); + this.label.setAttributeNS(null,"cursor","pointer"); + var labelTextNode = document.createTextNode(this.labelText); + this.label.appendChild(labelTextNode); + this.label.setAttributeNS(null,"pointer-events","all"); + this.label.addEventListener("click",this,false); + this.parentNode.appendChild(this.label); + } + } + if (this.radioButtonGroup) { + this.radioButtonGroup.addCheckBox(this); + } +} + +checkBox.prototype.handleEvent = function(evt) { + if (evt.type == "click") { + if (this.checkedStatus == true) { + this.checkCross.setAttributeNS(null,"display","none"); + this.checkedStatus = false; + } + else { + this.checkCross.setAttributeNS(null,"display","inline"); + this.checkedStatus = true; + } + } + this.timer.setTimeout("fireFunction",this.timerMs); +} + +checkBox.prototype.fireFunction = function() { + if (this.radioButtonGroup) { + this.radioButtonGroup.selectById(this.id,true); + } + else { + if (typeof(this.functionToCall) == "function") { + this.functionToCall(this.id,this.checkedStatus,this.labelText); + } + if (typeof(this.functionToCall) == "object") { + this.functionToCall.checkBoxChanged(this.id,this.checkedStatus,this.labelText); + } + if (typeof(this.functionToCall) == undefined) { + return; + } + } +} + +checkBox.prototype.check = function(FireFunction) { + this.checkCross.setAttributeNS(null,"display","inherit"); + this.checkedStatus = true; + if (FireFunction) { + this.timer.setTimeout("fireFunction",this.timerMs); + } +} + +checkBox.prototype.uncheck = function(FireFunction) { + this.checkCross.setAttributeNS(null,"display","none"); + this.checkedStatus = false; + if (FireFunction) { + this.timer.setTimeout("fireFunction",this.timerMs); + } +} + +//move checkbox to a different position +checkBox.prototype.moveTo = function(moveX,moveY) { + this.x = moveX; + this.y = moveY; + //move checkbox + this.checkBox.setAttributeNS(null,"x",this.x); + this.checkBox.setAttributeNS(null,"y",this.y); + //move checkcross + this.checkCross.setAttributeNS(null,"x",this.x); + this.checkCross.setAttributeNS(null,"y",this.y); + //move text label + if (this.labelText) { + this.label.setAttributeNS(null,"x",(this.x + this.labelDistance)); + this.label.setAttributeNS(null,"y",(this.y + this.labelYOffset)); + } +} + +checkBox.prototype.remove = function(FireFunction) { + this.checkBox.removeEventListener("click",this,false); + this.parentNode.removeChild(this.checkBox); + this.parentNode.removeChild(this.checkCross); + if (this.label) { + this.parentNode.removeChild(this.label); + } + this.exists = false; +} + +checkBox.prototype.setLabelText = function(labelText) { + this.labelText = labelText + if (this.label) { + this.label.firstChild.nodeValue = labelText; + } + else { + if (this.labelText.length > 0) { + this.label = document.createElementNS(svgNS,"text"); + for (var attrib in this.textStyles) { + value = this.textStyles[attrib]; + if (attrib == "font-size") { + value += "px"; + } + this.label.setAttributeNS(null,attrib,value); + } + this.label.setAttributeNS(null,"x",(this.x + this.labelDistance)); + this.label.setAttributeNS(null,"y",(this.y + this.textStyles["font-size"] * 0.3)); + var labelTextNode = document.createTextNode(this.labelText); + this.label.appendChild(labelTextNode); + this.parentNode.appendChild(this.label); + } + } +} + +/* start of the radioButtonGroup object */ + +function radioButtonGroup(id,functionToCall) { + var nrArguments = 2; + if (arguments.length == nrArguments) { + this.id = id; + if (typeof(functionToCall) == "function" || typeof(functionToCall) == "object" || typeof(functionToCall) == undefined) { + this.functionToCall = functionToCall; + } + else { + alert("Error in radiobutton with ("+id+"): argument functionToCall is not of type 'function', 'object' or undefined!"); + } + this.checkBoxes = new Array(); //this array will hold checkbox objects + this.selectedId = undefined; //holds the id of the active radio button + this.selectedIndex = undefined; //holds the index of the active radio button + //timer stuff + this.timer = new Timer(this); //a Timer instance for calling the functionToCall + this.timerMs = 200; //a constant of this object that is used in conjunction with the timer - functionToCall is called after 200 ms + } + else { + alert("Error in radiobutton with ("+id+"): wrong nr of arguments! You have to pass over "+nrArguments+" parameters."); + } +} + +radioButtonGroup.prototype.addCheckBox = function(checkBoxObj) { + this.checkBoxes.push(checkBoxObj); + if (checkBoxObj.checkedStatus) { + this.selectedId = checkBoxObj.id; + this.selectedIndex = this.checkBoxes.length - 1; + } +} + +//change radio button selection by id +radioButtonGroup.prototype.selectById = function(cbId,fireFunction) { + var found = false; + for (var i=0;i<this.checkBoxes.length;i++) { + if (this.checkBoxes[i].id == cbId) { + this.selectedId = cbId; + this.selectedIndex = i; + if (this.checkBoxes[i].checkedStatus == false) { + this.checkBoxes[i].check(false); + } + found = true; + } + else { + this.checkBoxes[i].uncheck(false); + } + } + if (found) { + if (fireFunction) { + this.timer.setTimeout("fireFunction",this.timerMs); + } + } + else { + alert("Error in radiobutton with ("+this.id+"): could not find checkbox with id '"+cbId+"'"); + } +} + +//change radio button selection by label name +radioButtonGroup.prototype.selectByLabelname = function(labelName,fireFunction) { + var id = -1; + for (var i=0;i<this.checkBoxes.length;i++) { + if (this.checkBoxes[i].labelText == labelName) { + id = this.checkBoxes[i].id; + } + } + if (id == -1) { + alert("Error in radiobutton with ("+this.id+"): could not find checkbox with label '"+labelName+"'"); + } + else { + this.selectById(id,fireFunction); + } +} + +radioButtonGroup.prototype.fireFunction = function() { + if (typeof(this.functionToCall) == "function") { + this.functionToCall(this.id,this.selectedId,this.checkBoxes[this.selectedIndex].labelText); + } + if (typeof(this.functionToCall) == "object") { + this.functionToCall.radioButtonChanged(this.id,this.selectedId,this.checkBoxes[this.selectedIndex].labelText); + } + if (typeof(this.functionToCall) == undefined) { + return; + } +} \ No newline at end of file diff -r 0dc1fc63c945 -r 0b682d3dd01b static/scripts/helper_functions.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/scripts/helper_functions.js Tue Apr 13 11:15:35 2010 -0400 @@ -0,0 +1,817 @@ +/** + * @fileoverview + * + * ECMAScript <a href="http://www.carto.net/papers/svg/resources/helper_functions.html">helper functions</a>, main purpose is to serve in SVG mapping or other SVG based web applications + * + * This ECMA script library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library (http://www.carto.net/papers/svg/resources/lesser_gpl.txt) if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Please report bugs and send improvements to neumann(a)karto.baug.ethz.ch + * If you use these scripts, please link to the original (http://www.carto.net/papers/svg/resources/helper_functions.html) + * somewhere in the source-code-comment or the "about" of your project and give credits, thanks! + * + * See <a href="js_docs_out/overview-summary-helper_functions.js.html">documentation</a>. + * + * @author Andreas Neumann a.neumann(a)carto.net + * @copyright LGPL 2.1 <a href="http://www.gnu.org/copyleft/lesser.txt">Gnu LGPL 2.1</a> + * @credits Bruce Rindahl, numerous people on svgdevelopers(a)yahoogroups.com + */ + +//global variables necessary to create elements in these namespaces, do not delete them!!!! + +/** + * This variable is a shortcut to the full URL of the SVG namespace + * @final + * @type String + */ +var svgNS = "http://www.w3.org/2000/svg"; + +/** + * This variable is a shortcut to the full URL of the XLink namespace + * @final + * @type String + */ +var xlinkNS = "http://www.w3.org/1999/xlink"; + +/** + * This variable is a shortcut to the full URL of the attrib namespace + * @final + * @type String + */ +var cartoNS = "http://www.carto.net/attrib"; + +/** + * This variable is a alias to the full URL of the attrib namespace + * @final + * @type String + */ +var attribNS = "http://www.carto.net/attrib"; + +/** + * This variable is a alias to the full URL of the Batik extension namespace + * @final + * @type String + */ +var batikNS = "http://xml.apache.org/batik/ext"; + +/** + * Returns the polar direction from a given vector + * @param {Number} xdiff the x-part of the vector + * @param {Number} ydiff the y-part of the vector + * @return direction the direction in radians + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDist + * @see #toRectX + * @see #toRectY + */ +function toPolarDir(xdiff,ydiff) { + var direction = (Math.atan2(ydiff,xdiff)); + return(direction); +} + +/** + * Returns the polar distance from a given vector + * @param {Number} xdiff the x-part of the vector + * @param {Number} ydiff the y-part of the vector + * @return distance the distance + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDir + * @see #toRectX + * @see #toRectY + */ +function toPolarDist(xdiff,ydiff) { + var distance = Math.sqrt(xdiff * xdiff + ydiff * ydiff); + return(distance); +} + +/** + * Returns the x-part of a vector from a given direction and distance + * @param {Number} direction the direction (in radians) + * @param {Number} distance the distance + * @return x the x-part of the vector + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDist + * @see #toPolarDir + * @see #toRectY + */ +function toRectX(direction,distance) { + var x = distance * Math.cos(direction); + return(x); +} + +/** + * Returns the y-part of the vector from a given direction and distance + * @param {Number} direction the direction (in radians) + * @param {Number} distance the distance + * @return y the y-part of the vector + * @type Number + * @version 1.0 (2007-04-30) + * @see #toPolarDist + * @see #toPolarDir + * @see #toRectX + */ +function toRectY(direction,distance) { + y = distance * Math.sin(direction); + return(y); +} + +/** + * Converts degrees to radians + * @param {Number} deg the degree value + * @return rad the radians value + * @type Number + * @version 1.0 (2007-04-30) + * @see #RadToDeg + */ +function DegToRad(deg) { + return (deg / 180.0 * Math.PI); +} + +/** + * Converts radians to degrees + * @param {Number} rad the radians value + * @return deg the degree value + * @type Number + * @version 1.0 (2007-04-30) + * @see #DegToRad + */ +function RadToDeg(rad) { + return (rad / Math.PI * 180.0); +} + +/** + * Converts decimal degrees to degrees, minutes, seconds + * @param {Number} dd the decimal degree value + * @return degrees the degree values in the following notation: {deg:degrees,min:minutes,sec:seconds} + * @type literal + * @version 1.0 (2007-04-30) + * @see #dms2dd + */ +function dd2dms(dd) { + var minutes = (Math.abs(dd) - Math.floor(Math.abs(dd))) * 60; + var seconds = (minutes - Math.floor(minutes)) * 60; + var minutes = Math.floor(minutes); + if (dd >= 0) { + var degrees = Math.floor(dd); + } + else { + var degrees = Math.ceil(dd); + } + return {deg:degrees,min:minutes,sec:seconds}; +} + +/** + * Converts degrees, minutes and seconds to decimal degrees + * @param {Number} deg the degree value + * @param {Number} min the minute value + * @param {Number} sec the second value + * @return deg the decimal degree values + * @type Number + * @version 1.0 (2007-04-30) + * @see #dd2dms + */ +function dms2dd(deg,min,sec) { + if (deg < 0) { + return deg - (min / 60) - (sec / 3600); + } + else { + return deg + (min / 60) + (sec / 3600); + } +} + +/** + * log function, missing in the standard Math object + * @param {Number} x the value where the log function should be applied to + * @param {Number} b the base value for the log function + * @return logResult the result of the log function + * @type Number + * @version 1.0 (2007-04-30) + */ +function log(x,b) { + if(b==null) b=Math.E; + return Math.log(x)/Math.log(b); +} + +/** + * interpolates a value (e.g. elevation) bilinearly based on the position within a cell with 4 corner values + * @param {Number} za the value at the upper left corner of the cell + * @param {Number} zb the value at the upper right corner of the cell + * @param {Number} zc the value at the lower right corner of the cell + * @param {Number} zd the value at the lower left corner of the cell + * @param {Number} xpos the x position of the point where a new value should be interpolated + * @param {Number} ypos the y position of the point where a new value should be interpolated + * @param {Number} ax the x position of the lower left corner of the cell + * @param {Number} ay the y position of the lower left corner of the cell + * @param {Number} cellsize the size of the cell + * @return interpol_value the result of the bilinear interpolation function + * @type Number + * @version 1.0 (2007-04-30) + */ +function intBilinear(za,zb,zc,zd,xpos,ypos,ax,ay,cellsize) { //bilinear interpolation function + var e = (xpos - ax) / cellsize; + var f = (ypos - ay) / cellsize; + + //calculation of weights + var wa = (1 - e) * (1 - f); + var wb = e * (1 - f); + var wc = e * f; + var wd = f * (1 - e); + + var interpol_value = wa * zc + wb * zd + wc * za + wd * zb; + return interpol_value; +} + +/** + * tests if a given point is left or right of a given line + * @param {Number} pointx the x position of the given point + * @param {Number} pointy the y position of the given point + * @param {Number} linex1 the x position of line's start point + * @param {Number} liney1 the y position of line's start point + * @param {Number} linex2 the x position of line's end point + * @param {Number} liney2 the y position of line's end point + * @return leftof the result of the leftOfTest, 1 means leftOf, 0 means rightOf + * @type Number (integer, 0|1) + * @version 1.0 (2007-04-30) + */ +function leftOfTest(pointx,pointy,linex1,liney1,linex2,liney2) { + var result = (liney1 - pointy) * (linex2 - linex1) - (linex1 - pointx) * (liney2 - liney1); + if (result < 0) { + var leftof = 1; //case left of + } + else { + var leftof = 0; //case left of + } + return leftof; +} + +/** + * calculates the distance between a given point and a given line + * @param {Number} pointx the x position of the given point + * @param {Number} pointy the y position of the given point + * @param {Number} linex1 the x position of line's start point + * @param {Number} liney1 the y position of line's start point + * @param {Number} linex2 the x position of line's end point + * @param {Number} liney2 the y position of line's end point + * @return distance the result of the leftOfTest, 1 means leftOf, 0 means rightOf + * @type Number + * @version 1.0 (2007-04-30) + */ +function distFromLine(xpoint,ypoint,linex1,liney1,linex2,liney2) { + var dx = linex2 - linex1; + var dy = liney2 - liney1; + var distance = (dy * (xpoint - linex1) - dx * (ypoint - liney1)) / Math.sqrt(Math.pow(dx,2) + Math.pow(dy,2)); + return distance; +} + +/** + * calculates the angle between two vectors (lines) + * @param {Number} ax the x part of vector a + * @param {Number} ay the y part of vector a + * @param {Number} bx the x part of vector b + * @param {Number} by the y part of vector b + * @return angle the angle in radians + * @type Number + * @version 1.0 (2007-04-30) + * @credits <a href="http://www.mathe-online.at/mathint/vect2/i.html#Winkel">Mathe Online (Winkel)</a> + */ +function angleBetwTwoLines(ax,ay,bx,by) { + var angle = Math.acos((ax * bx + ay * by) / (Math.sqrt(Math.pow(ax,2) + Math.pow(ay,2)) * Math.sqrt(Math.pow(bx,2) + Math.pow(by,2)))); + return angle; +} + +/** + * calculates the bisector vector for two given vectors + * @param {Number} ax the x part of vector a + * @param {Number} ay the y part of vector a + * @param {Number} bx the x part of vector b + * @param {Number} by the y part of vector b + * @return c the resulting vector as an Array, c[0] is the x part of the vector, c[1] is the y part + * @type Array + * @version 1.0 (2007-04-30) + * @credits <a href="http://www.mathe-online.at/mathint/vect1/i.html#Winkelsymmetrale">Mathe Online (Winkelsymmetrale)</a> + * see #calcBisectorAngle + * */ +function calcBisectorVector(ax,ay,bx,by) { + var betraga = Math.sqrt(Math.pow(ax,2) + Math.pow(ay,2)); + var betragb = Math.sqrt(Math.pow(bx,2) + Math.pow(by,2)); + var c = new Array(); + c[0] = ax / betraga + bx / betragb; + c[1] = ay / betraga + by / betragb; + return c; +} + +/** + * calculates the bisector angle for two given vectors + * @param {Number} ax the x part of vector a + * @param {Number} ay the y part of vector a + * @param {Number} bx the x part of vector b + * @param {Number} by the y part of vector b + * @return angle the bisector angle in radians + * @type Number + * @version 1.0 (2007-04-30) + * @credits <a href="http://www.mathe-online.at/mathint/vect1/i.html#Winkelsymmetrale">Mathe Online (Winkelsymmetrale)</a> + * see #calcBisectorVector + * */ +function calcBisectorAngle(ax,ay,bx,by) { + var betraga = Math.sqrt(Math.pow(ax,2) + Math.pow(ay,2)); + var betragb = Math.sqrt(Math.pow(bx,2) + Math.pow(by,2)); + var c1 = ax / betraga + bx / betragb; + var c2 = ay / betraga + by / betragb; + var angle = toPolarDir(c1,c2); + return angle; +} + +/** + * calculates the intersection point of two given lines + * @param {Number} line1x1 the x the start point of line 1 + * @param {Number} line1y1 the y the start point of line 1 + * @param {Number} line1x2 the x the end point of line 1 + * @param {Number} line1y2 the y the end point of line 1 + * @return interSectPoint the intersection point, interSectPoint.x contains x-part, interSectPoint.y the y-part of the resulting coordinate + * @type Object + * @version 1.0 (2007-04-30) + * @credits <a href="http://astronomy.swin.edu.au/~pbourke/geometry/lineline2d/">P. Bourke</a> + */ +function intersect2lines(line1x1,line1y1,line1x2,line1y2,line2x1,line2y1,line2x2,line2y2) { + var interSectPoint = new Object(); + var denominator = (line2y2 - line2y1)*(line1x2 - line1x1) - (line2x2 - line2x1)*(line1y2 - line1y1); + if (denominator == 0) { + alert("lines are parallel"); + } + else { + var ua = ((line2x2 - line2x1)*(line1y1 - line2y1) - (line2y2 - line2y1)*(line1x1 - line2x1)) / denominator; + var ub = ((line1x2 - line1x1)*(line1y1 - line2y1) - (line1y2 - line1y1)*(line1x1 - line2x1)) / denominator; + } + interSectPoint["x"] = line1x1 + ua * (line1x2 - line1x1); + interSectPoint["y"] = line1y1 + ua * (line1y2 - line1y1); + return interSectPoint; +} + +/** + * reformats a given number to a string by adding separators at every third digit + * @param {String|Number} inputNumber the input number, can be of type number or string + * @param {String} separator the separator, e.g. ' or , + * @return newString the intersection point, interSectPoint.x contains x-part, interSectPoint.y the y-part of the resulting coordinate + * @type String + * @version 1.0 (2007-04-30) + */ +function formatNumberString(inputNumber,separator) { + //check if of type string, if number, convert it to string + if (typeof(inputNumber) == "Number") { + var myTempString = inputNumber.toString(); + } + else { + var myTempString = inputNumber; + } + var newString=""; + //if it contains a comma, it will be split + var splitResults = myTempString.split("."); + var myCounter = splitResults[0].length; + if (myCounter > 3) { + while(myCounter > 0) { + if (myCounter > 3) { + newString = separator + splitResults[0].substr(myCounter - 3,3) + newString; + } + else { + newString = splitResults[0].substr(0,myCounter) + newString; + } + myCounter -= 3; + } + } + else { + newString = splitResults[0]; + } + //concatenate if it contains a comma + if (splitResults[1]) { + newString = newString + "." + splitResults[1]; + } + return newString; +} + +/** + * writes a status text message out to a SVG text element's first child + * @param {String} statusText the text message to be displayed + * @version 1.0 (2007-04-30) + */ + function statusChange(statusText) { + document.getElementById("statusText").firstChild.nodeValue = "Statusbar: " + statusText; +} + +/** + * scales an SVG element, requires that the element has an x and y attribute (e.g. circle, ellipse, use element, etc.) + * @param {dom::Event} evt the evt object that triggered the scaling + * @param {Number} factor the scaling factor + * @version 1.0 (2007-04-30) + */ +function scaleObject(evt,factor) { + //reference to the currently selected object + var element = evt.currentTarget; + var myX = element.getAttributeNS(null,"x"); + var myY = element.getAttributeNS(null,"y"); + var newtransform = "scale(" + factor + ") translate(" + (myX * 1 / factor - myX) + " " + (myY * 1 / factor - myY) +")"; + element.setAttributeNS(null,'transform', newtransform); +} + +/** + * returns the transformation matrix (ctm) for the given node up to the root element + * the basic use case is to provide a wrapper function for the missing SVGLocatable.getTransformToElement method (missing in ASV3) + * @param {svg::SVGTransformable} node the node reference for the SVGElement the ctm is queried + * @return CTM the current transformation matrix from the given node to the root element + * @type svg::SVGMatrix + * @version 1.0 (2007-05-01) + * @credits <a href="http://www.kevlindev.com/tutorials/basics/transformations/toUserSpace/index…">Kevin Lindsey (toUserSpace)</a> + * @see #getTransformToElement + */ +function getTransformToRootElement(node) { + try { + //this part is for fully conformant players (like Opera, Batik, Firefox, Safari ...) + var CTM = node.getTransformToElement(document.documentElement); + } + catch (ex) { + //this part is for ASV3 or other non-conformant players + // Initialize our CTM the node's Current Transformation Matrix + var CTM = node.getCTM(); + // Work our way through the ancestor nodes stopping at the SVG Document + while ( ( node = node.parentNode ) != document ) { + // Multiply the new CTM to the one with what we have accumulated so far + CTM = node.getCTM().multiply(CTM); + } + } + return CTM; +} + +/** + * returns the transformation matrix (ctm) for the given dom::Node up to a different dom::Node + * the basic use case is to provide a wrapper function for the missing SVGLocatable.getTransformToElement method (missing in ASV3) + * @param {svg::SVGTransformable} node the node reference for the element the where the ctm should be calculated from + * @param {svg::SVGTransformable} targetNode the target node reference for the element the ctm should be calculated to + * @return CTM the current transformation matrix from the given node to the target element + * @type svg::SVGMatrix + * @version 1.0 (2007-05-01) + * @credits <a href="http://www.kevlindev.com/tutorials/basics/transformations/toUserSpace/index…">Kevin Lindsey (toUserSpace)</a> + * @see #getTransformToRootElement + */ +function getTransformToElement(node,targetNode) { + try { + //this part is for fully conformant players + var CTM = node.getTransformToElement(targetNode); + } + catch (ex) { + //this part is for ASV3 or other non-conformant players + // Initialize our CTM the node's Current Transformation Matrix + var CTM = node.getCTM(); + // Work our way through the ancestor nodes stopping at the SVG Document + while ( ( node = node.parentNode ) != targetNode ) { + // Multiply the new CTM to the one with what we have accumulated so far + CTM = node.getCTM().multiply(CTM); + } + } + return CTM; +} + +/** + * converts HSV to RGB values + * @param {Number} hue the hue value (between 0 and 360) + * @param {Number} sat the saturation value (between 0 and 1) + * @param {Number} val the value value (between 0 and 1) + * @return rgbArr the rgb values (associative array or object, the keys are: red,green,blue), all values are scaled between 0 and 255 + * @type Object + * @version 1.0 (2007-05-01) + * @see #rgb2hsv + */ +function hsv2rgb(hue,sat,val) { + var rgbArr = new Object(); + if ( sat == 0) { + rgbArr["red"] = Math.round(val * 255); + rgbArr["green"] = Math.round(val * 255); + rgbArr["blue"] = Math.round(val * 255); + } + else { + var h = hue / 60; + var i = Math.floor(h); + var f = h - i; + if (i % 2 == 0) { + f = 1 - f; + } + var m = val * (1 - sat); + var n = val * (1 - sat * f); + switch(i) { + case 0: + rgbArr["red"] = val; + rgbArr["green"] = n; + rgbArr["blue"] = m; + break; + case 1: + rgbArr["red"] = n; + rgbArr["green"] = val; + rgbArr["blue"] = m; + break; + case 2: + rgbArr["red"] = m; + rgbArr["green"] = val; + rgbArr["blue"] = n; + break; + case 3: + rgbArr["red"] = m; + rgbArr["green"] = n; + rgbArr["blue"] = val; + break; + case 4: + rgbArr["red"] = n; + rgbArr["green"] = m; + rgbArr["blue"] = val; + break; + case 5: + rgbArr["red"] = val; + rgbArr["green"] = m; + rgbArr["blue"] = n; + break; + case 6: + rgbArr["red"] = val; + rgbArr["green"] = n; + rgbArr["blue"] = m; + break; + } + rgbArr["red"] = Math.round(rgbArr["red"] * 255); + rgbArr["green"] = Math.round(rgbArr["green"] * 255); + rgbArr["blue"] = Math.round(rgbArr["blue"] * 255); + } + return rgbArr; +} + +/** + * converts RGB to HSV values + * @param {Number} red the hue value (between 0 and 255) + * @param {Number} green the saturation value (between 0 and 255) + * @param {Number} blue the value value (between 0 and 255) + * @return hsvArr the hsv values (associative array or object, the keys are: hue (0-360),sat (0-1),val (0-1)) + * @type Object + * @version 1.0 (2007-05-01) + * @see #hsv2rgb + */ +function rgb2hsv(red,green,blue) { + var hsvArr = new Object(); + red = red / 255; + green = green / 255; + blue = blue / 255; + myMax = Math.max(red, Math.max(green,blue)); + myMin = Math.min(red, Math.min(green,blue)); + v = myMax; + if (myMax > 0) { + s = (myMax - myMin) / myMax; + } + else { + s = 0; + } + if (s > 0) { + myDiff = myMax - myMin; + rc = (myMax - red) / myDiff; + gc = (myMax - green) / myDiff; + bc = (myMax - blue) / myDiff; + if (red == myMax) { + h = (bc - gc) / 6; + } + if (green == myMax) { + h = (2 + rc - bc) / 6; + } + if (blue == myMax) { + h = (4 + gc - rc) / 6; + } + } + else { + h = 0; + } + if (h < 0) { + h += 1; + } + hsvArr["hue"] = Math.round(h * 360); + hsvArr["sat"] = s; + hsvArr["val"] = v; + return hsvArr; +} + +/** + * populates an array such that it can be addressed by both a key or an index nr, + * note that both Arrays need to be of the same length + * @param {Array} arrayKeys the array containing the keys + * @param {Array} arrayValues the array containing the values + * @return returnArray the resulting array containing both associative values and also a regular indexed array + * @type Array + * @version 1.0 (2007-05-01) + */ +function arrayPopulate(arrayKeys,arrayValues) { + var returnArray = new Array(); + if (arrayKeys.length != arrayValues.length) { + alert("error: arrays do not have the same length!"); + } + else { + for (i=0;i<arrayKeys.length;i++) { + returnArray[arrayKeys[i]] = arrayValues[i]; + } + } + return returnArray; +} + +/** + * Wrapper object for network requests, uses getURL or XMLHttpRequest depending on availability + * The callBackFunction receives a XML or text node representing the rootElement + * of the fragment received or the return text, depending on the returnFormat. + * See also the following <a href="http://www.carto.net/papers/svg/network_requests/">documentation</a>. + * @class this is a wrapper object to provide network request functionality (get|post) + * @param {String} url the URL/IRI of the network resource to be called + * @param {Function|Object} callBackFunction the callBack function or object that is called after the data was received, in case of an object, the method 'receiveData' is called; both the function and the object's 'receiveData' method get 2 return parameters: 'node.firstChild'|text (the root element of the XML or text resource), this.additionalParams (if defined) + * @param {String} returnFormat the return format, either 'xml' or 'json' (or text) + * @param {String} method the method of the network request, either 'get' or 'post' + * @param {String|Undefined} postText the String containing the post text (optional) or Undefined (if not a 'post' request) + * @param {Object|Array|String|Number|Undefined} additionalParams additional parameters that will be passed to the callBackFunction or object (optional) or Undefined + * @return a new getData instance + * @type getData + * @constructor + * @version 1.0 (2007-02-23) + */ +function getData(url,callBackFunction,returnFormat,method,postText,additionalParams) { + this.url = url; + this.callBackFunction = callBackFunction; + this.returnFormat = returnFormat; + this.method = method; + this.additionalParams = additionalParams; + if (method != "get" && method != "post") { + alert("Error in network request: parameter 'method' must be 'get' or 'post'"); + } + this.postText = postText; + this.xmlRequest = null; //@private reference to the XMLHttpRequest object +} + +/** + * triggers the network request defined in the constructor + */ +getData.prototype.getData = function() { + //call getURL() if available + if (window.getURL) { + if (this.method == "get") { + getURL(this.url,this); + } + if (this.method == "post") { + postURL(this.url,this.postText,this); + } + } + //or call XMLHttpRequest() if available + else if (window.XMLHttpRequest) { + var _this = this; + this.xmlRequest = new XMLHttpRequest(); + if (this.method == "get") { + if (this.returnFormat == "xml") { + this.xmlRequest.overrideMimeType("text/xml"); + } + this.xmlRequest.open("GET",this.url,true); + } + if (this.method == "post") { + this.xmlRequest.open("POST",this.url,true); + } + this.xmlRequest.onreadystatechange = function() {_this.handleEvent()}; + if (this.method == "get") { + this.xmlRequest.send(null); + } + if (this.method == "post") { + //test if postText exists and is of type string + var reallyPost = true; + if (!this.postText) { + reallyPost = false; + alert("Error in network post request: missing parameter 'postText'!"); + } + if (typeof(this.postText) != "string") { + reallyPost = false; + alert("Error in network post request: parameter 'postText' has to be of type 'string')"); + } + if (reallyPost) { + this.xmlRequest.send(this.postText); + } + } + } + //write an error message if neither method is available + else { + alert("your browser/svg viewer neither supports window.getURL nor window.XMLHttpRequest!"); + } +} + +/** + * this is the callback method for the getURL() or postURL() case + * @private + */ +getData.prototype.operationComplete = function(data) { + //check if data has a success property + if (data.success) { + //parse content of the XML format to the variable "node" + if (this.returnFormat == "xml") { + //convert the text information to an XML node and get the first child + var node = parseXML(data.content,document); + //distinguish between a callback function and an object + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(node.firstChild,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(node.firstChild,this.additionalParams); + } + } + if (this.returnFormat == "json") { + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(data.content,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(data.content,this.additionalParams); + } + } + } + else { + alert("something went wrong with dynamic loading of geometry!"); + } +} + +/** + * this is the callback method for the XMLHttpRequest case + * @private + */ +getData.prototype.handleEvent = function() { + if (this.xmlRequest.readyState == 4) { + if (this.returnFormat == "xml") { + //we need to import the XML node first + var importedNode = document.importNode(this.xmlRequest.responseXML.documentElement,true); + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(importedNode,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(importedNode,this.additionalParams); + } + } + if (this.returnFormat == "json") { + if (typeof(this.callBackFunction) == "function") { + this.callBackFunction(this.xmlRequest.responseText,this.additionalParams); + } + if (typeof(this.callBackFunction) == "object") { + this.callBackFunction.receiveData(this.xmlRequest.responseText,this.additionalParams); + } + } + } +} + +/** + * Serializes an XML node and returns a string representation. Wrapper function to hide implementation differences. + * This can be used for debugging purposes or to post data to a server or network resource. + * @param {dom::Node} node the DOM node reference + * @return textRepresentation the String representation of the XML node + * @type String + * @version 1.0 (2007-05-01) + * @see getData + */ +function serializeNode(node) { + if (typeof XMLSerializer != 'undefined') { + return new XMLSerializer().serializeToString(node); + } + else if (typeof node.xml != 'undefined') { + return node.xml; + } + else if (typeof printNode != 'undefined') { + return printNode(node); + } + else if (typeof Packages != 'undefined') { + try { + var stringWriter = new java.io.StringWriter(); + Packages.org.apache.batik.dom.util.DOMUtilities.writeNode(node,stringWriter); + return stringWriter.toString(); + } + catch (e) { + alert("Sorry, your SVG viewer does not support the printNode/serialize function."); + return ''; + } + } + else { + alert("Sorry, your SVG viewer does not support the printNode/serialize function."); + return ''; + } +} + +/** + * Starts a SMIL animation element with the given id by triggering the '.beginElement()' method. + * This is a convenience (shortcut) function. + * @param {String} id a valid id of a valid SMIL animation element + * @version 1.0 (2007-05-01) + */ +//starts an animtion with the given id +//this function is useful in combination with window.setTimeout() +function startAnimation(id) { + document.getElementById(id).beginElement(); +} diff -r 0dc1fc63c945 -r 0b682d3dd01b static/scripts/timer.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/scripts/timer.js Tue Apr 13 11:15:35 2010 -0400 @@ -0,0 +1,74 @@ +// source/credits: "Algorithm": http://www.codingforums.com/showthread.php?s=&threadid=10531 +// The constructor should be called with +// the parent object (optional, defaults to window). + +function Timer(){ + this.obj = (arguments.length)?arguments[0]:window; + return this; +} + +// The set functions should be called with: +// - The name of the object method (as a string) (required) +// - The millisecond delay (required) +// - Any number of extra arguments, which will all be +// passed to the method when it is evaluated. + +Timer.prototype.setInterval = function(func, msec){ + var i = Timer.getNew(); + var t = Timer.buildCall(this.obj, i, arguments); + Timer.set[i].timer = window.setInterval(t,msec); + return i; +} +Timer.prototype.setTimeout = function(func, msec){ + var i = Timer.getNew(); + Timer.buildCall(this.obj, i, arguments); + Timer.set[i].timer = window.setTimeout("Timer.callOnce("+i+");",msec); + return i; +} + +// The clear functions should be called with +// the return value from the equivalent set function. + +Timer.prototype.clearInterval = function(i){ + if(!Timer.set[i]) return; + window.clearInterval(Timer.set[i].timer); + Timer.set[i] = null; +} +Timer.prototype.clearTimeout = function(i){ + if(!Timer.set[i]) return; + window.clearTimeout(Timer.set[i].timer); + Timer.set[i] = null; +} + +// Private data + +Timer.set = new Array(); +Timer.buildCall = function(obj, i, args){ + var t = ""; + Timer.set[i] = new Array(); + if(obj != window){ + Timer.set[i].obj = obj; + t = "Timer.set["+i+"].obj."; + } + t += args[0]+"("; + if(args.length > 2){ + Timer.set[i][0] = args[2]; + t += "Timer.set["+i+"][0]"; + for(var j=1; (j+2)<args.length; j++){ + Timer.set[i][j] = args[j+2]; + t += ", Timer.set["+i+"]["+j+"]"; + }} + t += ");"; + Timer.set[i].call = t; + return t; +} +Timer.callOnce = function(i){ + if(!Timer.set[i]) return; + eval(Timer.set[i].call); + Timer.set[i] = null; +} +Timer.getNew = function(){ + var i = 0; + while(Timer.set[i]) i++; + return i; +} \ No newline at end of file diff -r 0dc1fc63c945 -r 0b682d3dd01b tools/rgenetics/rgGRR.py --- a/tools/rgenetics/rgGRR.py Tue Apr 13 10:22:27 2010 -0400 +++ b/tools/rgenetics/rgGRR.py Tue Apr 13 11:15:35 2010 -0400 @@ -1,1096 +1,1145 @@ -""" -# july 2009: Need to see outliers so need to draw them last? -# could use clustering on the zscores to guess real relationships for unrelateds -# but definitely need to draw last -# added MAX_SHOW_ROWS to limit the length of the main report page -# Changes for Galaxy integration -# added more robust knuth method for one pass mean and sd -# no difference really - let's use scipy.mean() and scipy.std() instead... -# fixed labels and changed to .xls for outlier reports so can open in excel -# interesting - with a few hundred subjects, 5k gives good resolution -# and 100k gives better but not by much -# TODO remove non autosomal markers -# TODO it would be best if label had the zmean and zsd as these are what matter for -# outliers rather than the group mean/sd -# mods to rgGRR.py from channing CVS which John Ziniti has rewritten to produce SVG plots -# to make a Galaxy tool - we need the table of mean and SD for interesting pairs, the SVG and the log -# so the result should be an HTML file - -# rgIBS.py -# use a random subset of markers for a quick ibs -# to identify sample dups and closely related subjects -# try snpMatrix and plink and see which one works best for us? -# abecasis grr plots mean*sd for every subject to show clusters -# mods june 23 rml to avoid non-autosomal markers -# we seem to be distinguishing parent-child by gender - 2 clouds! - - -snpMatrix from David Clayton has: -ibs.stats function to calculate the identity-by-state stats of a group of samples -Description -Given a snp.matrix-class or a X.snp.matrix-class object with N samples, calculates some statistics -about the relatedness of every pair of samples within. - -Usage -ibs.stats(x) -8 ibs.stats -Arguments -x a snp.matrix-class or a X.snp.matrix-class object containing N samples -Details -No-calls are excluded from consideration here. -Value -A data.frame containing N(N - 1)/2 rows, where the row names are the sample name pairs separated -by a comma, and the columns are: -Count count of identical calls, exclusing no-calls -Fraction fraction of identical calls comparied to actual calls being made in both samples -Warning -In some applications, it may be preferable to subset a (random) selection of SNPs first - the -calculation -time increases as N(N - 1)M/2 . Typically for N = 800 samples and M = 3000 SNPs, the -calculation time is about 1 minute. A full GWA scan could take hours, and quite unnecessary for -simple applications such as checking for duplicate or related samples. -Note -This is mostly written to find mislabelled and/or duplicate samples. -Illumina indexes their SNPs in alphabetical order so the mitochondria SNPs comes first - for most -purpose it is undesirable to use these SNPs for IBS purposes. -TODO: Worst-case S4 subsetting seems to make 2 copies of a large object, so one might want to -subset before rbind(), etc; a future version of this routine may contain a built-in subsetting facility -""" -import sys,os,time,random,string,copy,optparse - -try: - set -except NameError: - from Sets import Set as set - -from rgutils import timenow -import plinkbinJZ - - -opts = None -verbose = False - -showPolygons = False - -class NullDevice: - def write(self, s): - pass - -tempstderr = sys.stderr # save -sys.stderr = NullDevice() -# need to avoid blather about deprecation and other strange stuff from scipy -# the current galaxy job runner assumes that -# the job is in error if anything appears on sys.stderr -# grrrrr. James wants to keep it that way instead of using the -# status flag for some strange reason. Presumably he doesn't use R or (in this case, scipy) -import numpy -import scipy -from scipy import weave - - -sys.stderr=tempstderr - - -PROGNAME = os.path.split(sys.argv[0])[-1] -X_AXIS_LABEL = 'Mean Alleles Shared' -Y_AXIS_LABEL = 'SD Alleles Shared' -LEGEND_ALIGN = 'topleft' -LEGEND_TITLE = 'Relationship' -DEFAULT_SYMBOL_SIZE = 1.0 # default symbol size -DEFAULT_SYMBOL_SIZE = 0.5 # default symbol size - -### Some colors for R/rpy -R_BLACK = 1 -R_RED = 2 -R_GREEN = 3 -R_BLUE = 4 -R_CYAN = 5 -R_PURPLE = 6 -R_YELLOW = 7 -R_GRAY = 8 - -### ... and some point-styles - -### -PLOT_HEIGHT = 600 -PLOT_WIDTH = 1150 - - -#SVG_COLORS = ('black', 'darkblue', 'blue', 'deepskyblue', 'firebrick','maroon','crimson') -#SVG_COLORS = ('cyan','dodgerblue','mediumpurple', 'fuchsia', 'red','gold','gray') -SVG_COLORS = ('cyan','dodgerblue','mediumpurple','forestgreen', 'lightgreen','gold','gray') -# dupe,parentchild,sibpair,halfsib,parents,unrel,unkn -#('orange', 'red', 'green', 'chartreuse', 'blue', 'purple', 'gray') - -OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Rel_Mean)\tSdev(Rel_Mean)\tMean(Rel_Sdev)\tSdev(Rel_Sdev)\n' -OUTLIERS_HEADER_list = ['Mean','Sdev','ZMean','ZSdev','FID1','IID1','FID2','IID2', -'RGMean_M','RGMean_SD','RGSD_M','RGSD_SD'] -TABLE_HEADER='fid1 iid1\tfid2 iid2\tmean\tsdev\tzmean\tzsdev\tgeno\trelcode\n' - - -### Relationship codes, text, and lookups/mappings -N_RELATIONSHIP_TYPES = 7 -REL_DUPE, REL_PARENTCHILD, REL_SIBS, REL_HALFSIBS, REL_RELATED, REL_UNRELATED, REL_UNKNOWN = range(N_RELATIONSHIP_TYPES) -REL_LOOKUP = { - REL_DUPE: ('dupe', R_BLUE, 1), - REL_PARENTCHILD: ('parentchild', R_YELLOW, 1), - REL_SIBS: ('sibpairs', R_RED, 1), - REL_HALFSIBS: ('halfsibs', R_GREEN, 1), - REL_RELATED: ('parents', R_PURPLE, 1), - REL_UNRELATED: ('unrelated', R_CYAN, 1), - REL_UNKNOWN: ('unknown', R_GRAY, 1), - } -OUTLIER_STDEVS = { - REL_DUPE: 2, - REL_PARENTCHILD: 2, - REL_SIBS: 2, - REL_HALFSIBS: 2, - REL_RELATED: 2, - REL_UNRELATED: 3, - REL_UNKNOWN: 2, - } -# note now Z can be passed in - -REL_STATES = [REL_LOOKUP[r][0] for r in range(N_RELATIONSHIP_TYPES)] -REL_COLORS = SVG_COLORS -REL_POINTS = [REL_LOOKUP[r][2] for r in range(N_RELATIONSHIP_TYPES)] - -DEFAULT_MAX_SAMPLE_SIZE = 10000 - -REF_COUNT_HOM1 = 3 -REF_COUNT_HET = 2 -REF_COUNT_HOM2 = 1 -MISSING = 0 -MAX_SHOW_ROWS = 100 # framingham has millions - delays showing output page - so truncate and explain -MARKER_PAIRS_PER_SECOND_SLOW = 15000000.0 -MARKER_PAIRS_PER_SECOND_FAST = 70000000.0 - - -galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> -<head> -<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> -<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> -<title></title> -<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> -</head> -<body> -<div class="document"> -""" - - -SVG_HEADER = '''<?xml version="1.0" standalone="no"?> -<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.2//EN" "http://www.w3.org/Graphics/SVG/1.2/DTD/svg12.dtd"> - -<svg width="1280" height="800" - xmlns="http://www.w3.org/2000/svg" version="1.2" - xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1280 800" onload="init()"> - - <script type="text/ecmascript" xlink:href="/static/scripts/tools/rgenetics/checkbox_and_radiobutton.js"/> - <script type="text/ecmascript" xlink:href="/static/scripts/tools/rgenetics/helper_functions.js"/> - <script type="text/ecmascript" xlink:href="/static/scripts/tools/rgenetics/timer.js"/> - <script type="text/ecmascript"> - <![CDATA[ - var checkBoxes = new Array(); - var radioGroupBandwidth; - var colours = ['%s','%s','%s','%s','%s','%s','%s']; - function init() { - var style = {"font-family":"Arial,Helvetica", "fill":"black", "font-size":12}; - var dist = 12; - var yOffset = 4; - - //A checkBox for each relationship type dupe,parentchild,sibpair,halfsib,parents,unrel,unkn - checkBoxes["dupe"] = new checkBox("dupe","checkboxes",20,40,"cbRect","cbCross",true,"Duplicate",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["parentchild"] = new checkBox("parentchild","checkboxes",20,60,"cbRect","cbCross",true,"Parent-Child",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["sibpairs"] = new checkBox("sibpairs","checkboxes",20,80,"cbRect","cbCross",true,"Sib-pairs",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["halfsibs"] = new checkBox("halfsibs","checkboxes",20,100,"cbRect","cbCross",true,"Half-sibs",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["parents"] = new checkBox("parents","checkboxes",20,120,"cbRect","cbCross",true,"Parents",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["unrelated"] = new checkBox("unrelated","checkboxes",20,140,"cbRect","cbCross",true,"Unrelated",style,dist,yOffset,undefined,hideShowLayer); - checkBoxes["unknown"] = new checkBox("unknown","checkboxes",20,160,"cbRect","cbCross",true,"Unknown",style,dist,yOffset,undefined,hideShowLayer); - - } - - function hideShowLayer(id, status, label) { - var vis = "hidden"; - if (status) { - vis = "visible"; - } - document.getElementById(id).setAttributeNS(null, 'visibility', vis); - } - - function showBTT(evt, rel, mm, dm, md, dd, n, mg, dg, lg, hg) { - var x = parseInt(evt.pageX)-250; - var y = parseInt(evt.pageY)-110; - switch(rel) { - case 0: - fill = colours[rel]; - relt = "dupe"; - break; - case 1: - fill = colours[rel]; - relt = "parentchild"; - break; - case 2: - fill = colours[rel]; - relt = "sibpairs"; - break; - case 3: - fill = colours[rel]; - relt = "halfsibs"; - break; - case 4: - fill = colours[rel]; - relt = "parents"; - break; - case 5: - fill = colours[rel]; - relt = "unrelated"; - break; - case 6: - fill = colours[rel]; - relt = "unknown"; - break; - default: - fill = "cyan"; - relt = "ERROR_CODE: "+rel; - } - - document.getElementById("btRel").textContent = "GROUP: "+relt; - document.getElementById("btMean").textContent = "mean="+mm+" +/- "+dm; - document.getElementById("btSdev").textContent = "sdev="+dm+" +/- "+dd; - document.getElementById("btPair").textContent = "npairs="+n; - document.getElementById("btGeno").textContent = "ngenos="+mg+" +/- "+dg+" (min="+lg+", max="+hg+")"; - document.getElementById("btHead").setAttribute('fill', fill); - - var tt = document.getElementById("btTip"); - tt.setAttribute("transform", "translate("+x+","+y+")"); - tt.setAttribute('visibility', 'visible'); - } - - function showOTT(evt, rel, s1, s2, mean, sdev, ngeno, rmean, rsdev) { - var x = parseInt(evt.pageX)-150; - var y = parseInt(evt.pageY)-180; - - switch(rel) { - case 0: - fill = colours[rel]; - relt = "dupe"; - break; - case 1: - fill = colours[rel]; - relt = "parentchild"; - break; - case 2: - fill = colours[rel]; - relt = "sibpairs"; - break; - case 3: - fill = colours[rel]; - relt = "halfsibs"; - break; - case 4: - fill = colours[rel]; - relt = "parents"; - break; - case 5: - fill = colours[rel]; - relt = "unrelated"; - break; - case 6: - fill = colours[rel]; - relt = "unknown"; - break; - default: - fill = "cyan"; - relt = "ERROR_CODE: "+rel; - } - - document.getElementById("otRel").textContent = "PAIR: "+relt; - document.getElementById("otS1").textContent = "s1="+s1; - document.getElementById("otS2").textContent = "s2="+s2; - document.getElementById("otMean").textContent = "mean="+mean; - document.getElementById("otSdev").textContent = "sdev="+sdev; - document.getElementById("otGeno").textContent = "ngenos="+ngeno; - document.getElementById("otRmean").textContent = "relmean="+rmean; - document.getElementById("otRsdev").textContent = "relsdev="+rsdev; - document.getElementById("otHead").setAttribute('fill', fill); - - var tt = document.getElementById("otTip"); - tt.setAttribute("transform", "translate("+x+","+y+")"); - tt.setAttribute('visibility', 'visible'); - } - - function hideBTT(evt) { - document.getElementById("btTip").setAttributeNS(null, 'visibility', 'hidden'); - } - - function hideOTT(evt) { - document.getElementById("otTip").setAttributeNS(null, 'visibility', 'hidden'); - } - - ]]> - </script> - <defs> -  - <symbol id="cbRect" overflow="visible"> - <rect x="-5" y="-5" width="10" height="10" fill="white" stroke="dimgray" stroke-width="1" cursor="pointer"/> - </symbol> - <symbol id="cbCross" overflow="visible"> - <g pointer-events="none" stroke="black" stroke-width="1"> - <line x1="-3" y1="-3" x2="3" y2="3"/> - <line x1="3" y1="-3" x2="-3" y2="3"/> - </g> - </symbol> - </defs> - -<desc>Developer Works Dynamic Scatter Graph Scaling Example</desc> - - -<g style="stroke-width:1.0; stroke:black; shape-rendering:crispEdges"> -  - <path d="M 100 100 L 1250 100 Z"/> - <path d="M 100 700 L 1250 700 Z"/> - -  - <path d="M 100 100 L 100 700 Z"/> - <path d="M 1250 100 L 1250 700 Z"/> -</g> - -<g transform="translate(100,100)"> - -  - <g style="fill:none; stroke:#dddddd; stroke-width:1; stroke-dasharray:2,2; text-anchor:end; shape-rendering:crispEdges"> - -  - <line x1="125" y1="0" x2="115" y2="600" /> - <line x1="230" y1="0" x2="230" y2="600" /> - <line x1="345" y1="0" x2="345" y2="600" /> - <line x1="460" y1="0" x2="460" y2="600" /> - <line x1="575" y1="0" x2="575" y2="600" style="stroke-dasharray:none;" /> - <line x1="690" y1="0" x2="690" y2="600" /> - <line x1="805" y1="0" x2="805" y2="600" /> - <line x1="920" y1="0" x2="920" y2="600" /> - <line x1="1035" y1="0" x2="1035" y2="600" /> - -  - <line x1="0" y1="60" x2="1150" y2="60" /> - <line x1="0" y1="120" x2="1150" y2="120" /> - <line x1="0" y1="180" x2="1150" y2="180" /> - <line x1="0" y1="240" x2="1150" y2="240" /> - <line x1="0" y1="300" x2="1150" y2="300" style="stroke-dasharray:none;" /> - <line x1="0" y1="360" x2="1150" y2="360" /> - <line x1="0" y1="420" x2="1150" y2="420" /> - <line x1="0" y1="480" x2="1150" y2="480" /> - <line x1="0" y1="540" x2="1150" y2="540" /> - </g> - -  - <g style="fill:black; stroke:none" font-size="12" font-family="Arial" transform="translate(25,25)"> - <rect width="160" height="270" style="fill:none; stroke:black; shape-rendering:crispEdges" /> - <text x="5" y="20" style="fill:black; stroke:none;" font-size="13" font-weight="bold">Given Pair Relationship</text> - <rect x="120" y="35" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="55" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="75" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="95" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="115" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="135" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <rect x="120" y="155" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> - <text x="15" y="195" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore gt 15</text> - <circle cx="125" cy="192" r="6" style="stroke:red; fill:gold; fill-opacity:1.0; stroke-width:1;"/> - <text x="15" y="215" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore 4 to 15</text> - <circle cx="125" cy="212" r="3" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> - <text x="15" y="235" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore lt 4</text> - <circle cx="125" cy="232" r="2" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> - <g id="checkboxes"> - </g> - </g> - - - <g style='fill:black; stroke:none' font-size="17" font-family="Arial"> -  - <text x="480" y="660">Mean Alleles Shared</text> - <text x="0" y="630" >1.0</text> - <text x="277" y="630" >1.25</text> - <text x="564" y="630" >1.5</text> - <text x="842" y="630" >1.75</text> - <text x="1140" y="630" >2.0</text> - </g> - - <g transform="rotate(270)" style="fill:black; stroke:none" font-size="17" font-family="Arial"> -  - <text x="-350" y="-40">SD Alleles Shared</text> - <text x="-20" y="-10" >1.0</text> - <text x="-165" y="-10" >0.75</text> - <text x="-310" y="-10" >0.5</text> - <text x="-455" y="-10" >0.25</text> - <text x="-600" y="-10" >0.0</text> - </g> - - -<g style="fill:black; stroke:none" font-size="18" font-family="Arial"> - <text x="425" y="-30">%s</text> -</g> - - -''' - -SVG_FOOTER = ''' - -</g> -<g id="btTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> - <rect width="250" height="110" style="fill:silver" rx="2" ry="2"/> - <rect id="btHead" width="250" height="20" rx="2" ry="2" /> - <text id="btRel" y="14" x="85">unrelated</text> - <text id="btMean" y="40" x="4">mean=1.5 +/- 0.04</text> - <text id="btSdev" y="60" x="4">sdev=0.7 +/- 0.03</text> - <text id="btPair" y="80" x="4">npairs=1152</text> - <text id="btGeno" y="100" x="4">ngenos=4783 +/- 24 (min=1000, max=5000)</text> -</g> - -<g id="otTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> - <rect width="150" height="180" style="fill:silver" rx="2" ry="2"/> - <rect id="otHead" width="150" height="20" rx="2" ry="2" /> - <text id="otRel" y="14" x="40">sibpairs</text> - <text id="otS1" y="40" x="4">s1=fid1,iid1</text> - <text id="otS2" y="60" x="4">s2=fid2,iid2</text> - <text id="otMean" y="80" x="4">mean=1.82</text> - <text id="otSdev" y="100" x="4">sdev=0.7</text> - <text id="otGeno" y="120" x="4">ngeno=4487</text> - <text id="otRmean" y="140" x="4">relmean=1.85</text> - <text id="otRsdev" y="160" x="4">relsdev=0.65</text> -</g> -</svg> -''' - -OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Mean)\tSdev(Mean)\tMean(Sdev)\tSdev(Sdev)\n' - -DEFAULT_MAX_SAMPLE_SIZE = 5000 - -REF_COUNT_HOM1 = 3 -REF_COUNT_HET = 2 -REF_COUNT_HOM2 = 1 -MISSING = 0 - -MARKER_PAIRS_PER_SECOND_SLOW = 15000000 -MARKER_PAIRS_PER_SECOND_FAST = 70000000 - -POLYGONS = { - REL_UNRELATED: ((1.360, 0.655), (1.385, 0.730), (1.620, 0.575), (1.610, 0.505)), - REL_HALFSIBS: ((1.630, 0.500), (1.630, 0.550), (1.648, 0.540), (1.648, 0.490)), - REL_SIBS: ((1.660, 0.510), (1.665, 0.560), (1.820, 0.410), (1.820, 0.390)), - REL_PARENTCHILD: ((1.650, 0.470), (1.650, 0.490), (1.750, 0.440), (1.750, 0.420)), - REL_DUPE: ((1.970, 0.000), (1.970, 0.150), (2.000, 0.150), (2.000, 0.000)), - } - -def distance(point1, point2): - """ Calculate the distance between two points - """ - (x1,y1) = [float(d) for d in point1] - (x2,y2) = [float(d) for d in point2] - dx = abs(x1 - x2) - dy = abs(y1 - y2) - return math.sqrt(dx**2 + dy**2) - -def point_inside_polygon(x, y, poly): - """ Determine if a point (x,y) is inside a given polygon or not - poly is a list of (x,y) pairs. - - Taken from: http://www.ariel.com.au/a/python-point-int-poly.html - """ - - n = len(poly) - inside = False - - p1x,p1y = poly[0] - for i in range(n+1): - p2x,p2y = poly[i % n] - if y > min(p1y,p2y): - if y <= max(p1y,p2y): - if x <= max(p1x,p2x): - if p1y != p2y: - xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x - if p1x == p2x or x <= xinters: - inside = not inside - p1x,p1y = p2x,p2y - return inside - -def readMap(pedfile): - """ - """ - mapfile = pedfile.replace('.ped', '.map') - marker_list = [] - if os.path.exists(mapfile): - print 'readMap: %s' % (mapfile) - fh = file(mapfile, 'r') - for line in fh: - marker_list.append(line.strip().split()) - fh.close() - print 'readMap: %s markers' % (len(marker_list)) - return marker_list - -def calcMeanSD(useme): - """ - A numerically stable algorithm is given below. It also computes the mean. - This algorithm is due to Knuth,[1] who cites Welford.[2] - n = 0 - mean = 0 - M2 = 0 - - foreach x in data: - n = n + 1 - delta = x - mean - mean = mean + delta/n - M2 = M2 + delta*(x - mean) // This expression uses the new value of mean - end for - - variance_n = M2/n - variance = M2/(n - 1) - """ - mean = 0.0 - M2 = 0.0 - sd = 0.0 - n = len(useme) - if n > 1: - for i,x in enumerate(useme): - delta = x - mean - mean = mean + delta/(i+1) # knuth uses n+=1 at start - M2 = M2 + delta*(x - mean) # This expression uses the new value of mean - variance = M2/(n-1) # assume is sample so lose 1 DOF - sd = pow(variance,0.5) - return mean,sd - - -def doIBSpy(inpath='',basename='',outdir=None,logf=None, - nrsSamples=10000,title='title',pdftoo=0,Zcutoff=2.0): - #def doIBS(pedName, title, nrsSamples=None, pdftoo=False): - """ started with snpmatrix but GRR uses actual IBS counts and sd's - """ - repOut = [] # text strings to add to the html display - refallele = {} - tblf = '%s_table.xls' % (title) - tbl = file(os.path.join(outdir,tblf), 'w') - tbl.write(TABLE_HEADER) - svgf = '%s.svg' % (title) - svg = file(os.path.join(outdir,svgf), 'w') - - bedname = '%s.bed' % (inpath) - pedname = '%s.ped' % (inpath) - print 'pedname',pedname - if os.path.exists(bedname): - ped = plinkbinJZ.BPed(inpath) - ped.parse(quick=True) - elif os.path.exists(pedname): - ped = plinkbinJZ.LPed(inpath) - ped.parse() - else: - print >> sys.stdout, '## doIBSpy problem - cannot open %s or %s - cannot run' % (bedname,pedname) - nMarkers = len(ped._markers) - if nMarkers < 5: - print sys.stderr, '### ERROR - %d is too few markers for reliable estimation in %s - terminating' % (nMarkers,PROGNAME) - sys.exit(1) - nSubjects = len(ped._subjects) - nrsSamples = min(nMarkers, nrsSamples) - if opts and opts.use_mito: - markers = range(nMarkers) - nrsSamples = min(len(markers), nrsSamples) - sampleIndexes = sorted(random.sample(markers, nrsSamples)) - else: - autosomals = ped.autosomal_indices() - nrsSamples = min(len(autosomals), nrsSamples) - sampleIndexes = sorted(random.sample(autosomals, nrsSamples)) - - print '' - print 'Getting random.sample of %s from %s total' % (nrsSamples, nMarkers) - npairs = (nSubjects*(nSubjects-1))/2 # total rows in table - newfiles=[svgf,tblf] - explanations = ['rgGRR Plot (requires SVG)','Mean by SD alleles shared - %d rows' % npairs] - # these go with the output file links in the html file - s = 'Reading genotypes for %s subjects and %s markers\n' % (nSubjects, nrsSamples) - logf.write(s) - minUsegenos = nrsSamples/2 # must have half? - nGenotypes = nSubjects*nrsSamples - stime = time.time() - emptyRows = set() - genos = numpy.zeros((nSubjects, nrsSamples), dtype=int) - for s in xrange(nSubjects): - nValid = 0 - #getGenotypesByIndices(self, s, mlist, format) - genos[s] = ped.getGenotypesByIndices(s, sampleIndexes, format='ref') - nValid = sum([1 for g in genos[s] if g]) - if not nValid: - emptyRows.add(s) - sub = ped.getSubject(s) - print 'All missing for row %d (%s)' % (s, sub) - logf.write('All missing for row %d (%s)\n' % (s, sub)) - rtime = time.time() - stime - if verbose: - print '@@Read %s genotypes in %s seconds' % (nGenotypes, rtime) - - - ### Now the expensive part. For each pair of subjects, we get the mean number - ### and standard deviation of shared alleles over all of the markers where both - ### subjects have a known genotype. Identical subjects should have mean shared - ### alleles very close to 2.0 with a standard deviation very close to 0.0. - tot = nSubjects*(nSubjects-1)/2 - nprog = tot/10 - nMarkerpairs = tot * nrsSamples - estimatedTimeSlow = nMarkerpairs/MARKER_PAIRS_PER_SECOND_SLOW - estimatedTimeFast = nMarkerpairs/MARKER_PAIRS_PER_SECOND_FAST - - pairs = [] - pair_data = {} - means = [] ## Mean IBS for each pair - ngenoL = [] ## Count of comparable genotypes for each pair - sdevs = [] ## Standard dev for each pair - rels = [] ## A relationship code for each pair - zmeans = [0.0 for x in xrange(tot)] ## zmean score for each pair for the relgroup - zstds = [0.0 for x in xrange(tot)] ## zstd score for each pair for the relgrp - skip = set() - ndone = 0 ## How many have been done so far - - logf.write('Calculating %d pairs, updating every %d pairs...\n' % (tot, nprog)) - logf.write('Estimated time is %2.2f to %2.2f seconds ...\n' % (estimatedTimeFast, estimatedTimeSlow)) - - t1sum = 0 - t2sum = 0 - t3sum = 0 - now = time.time() - scache = {} - _founder_cache = {} - C_CODE = """ - #include "math.h" - int i; - int sumibs = 0; - int ssqibs = 0; - int ngeno = 0; - float mean = 0; - float M2 = 0; - float delta = 0; - float sdev=0; - float variance=0; - for (i=0; i<nrsSamples; i++) { - int a1 = g1[i]; - int a2 = g2[i]; - if (a1 != 0 && a2 != 0) { - ngeno += 1; - int shared = 2-abs(a1-a2); - delta = shared - mean; - mean = mean + delta/ngeno; - M2 += delta*(shared-mean); - // yes that second time, the updated mean is used see calcmeansd above; - //printf("%d %d %d %d %d %d\\n", i, a1, a2, ngeno, shared, squared); - } - } - if (ngeno > 1) { - variance = M2/(ngeno-1); - sdev = sqrt(variance); - //printf("OK: %d %3.2f %3.2f\\n", ngeno, mean, sdev); - } - //printf("%d %d %d %1.2f %1.2f\\n", ngeno, sumibs, ssqibs, mean, sdev); - result[0] = ngeno; - result[1] = mean; - result[2] = sdev; - return_val = ngeno; - """ - started = time.time() - for s1 in xrange(nSubjects): - if s1 in emptyRows: - continue - (fid1,iid1,did1,mid1,sex1,phe1,iid1,d_sid1,m_sid1) = scache.setdefault(s1, ped.getSubject(s1)) - - isFounder1 = _founder_cache.setdefault(s1, (did1==mid1)) - g1 = genos[s1] - - for s2 in xrange(s1+1, nSubjects): - if s2 in emptyRows: - continue - if nprog and ndone % nprog == 0 and ndone > 1: - dur = time.time() - started - pct = float(ndone)/tot*100.0 - logf.write('%f sec at pair %d of %d (%3.2f%%): %f marker*pairs/sec\n' % (dur, ndone, tot, pct, ndone/dur*nrsSamples)) - t1s = time.time() - - (fid2,iid2,did2,mid2,sex2,phe2,iid2,d_sid2,m_sid2) = scache.setdefault(s2, ped.getSubject(s2)) - - g2 = genos[s2] - isFounder2 = _founder_cache.setdefault(s2, (did2==mid2)) - - # Determine the relationship for this pair - relcode = REL_UNKNOWN - if (fid2 == fid1): - if iid1 == iid2: - relcode = REL_DUPE - elif (did2 == did1) and (mid2 == mid1) and did1 != mid1: - relcode = REL_SIBS - elif (iid1 == mid2) or (iid1 == did2) or (iid2 == mid1) or (iid2 == did1): - relcode = REL_PARENTCHILD - elif (str(did1) != '0' and (did2 == did1)) or (str(mid1) != '0' and (mid2 == mid1)): - relcode = REL_HALFSIBS - else: - # People in the same family should be marked as some other - # form of related. In general, these people will have a - # pretty random spread of similarity. This distinction is - # probably not very useful most of the time - relcode = REL_RELATED - else: - ### Different families - relcode = REL_UNRELATED - - t1e = time.time() - t1sum += t1e-t1s - - - ### Calculate sum(2-abs(a1-a2)) and sum((2-abs(a1-a2))**2) and count - ### the number of contributing genotypes. These values are not actually - ### calculated here, but instead are looked up in a table for speed. - ### FIXME: This is still too slow ... - result = [0.0, 0.0, 0.0] - ngeno = weave.inline(C_CODE, ['g1', 'g2', 'nrsSamples', 'result']) - if ngeno >= minUsegenos: - _, mean, sdev = result - means.append(mean) - sdevs.append(sdev) - ngenoL.append(ngeno) - pairs.append((s1, s2)) - rels.append(relcode) - else: - skip.add(ndone) # signal no comparable genotypes for this pair - ndone += 1 - t2e = time.time() - t2sum += t2e-t1e - t3e = time.time() - t3sum += t3e-t2e - - logme = [ 'T1: %s' % (t1sum), 'T2: %s' % (t2sum), 'T3: %s' % (t3sum),'TOT: %s' % (t3e-now), - '%s pairs with no (or not enough) comparable genotypes (%3.1f%%)' % (len(skip), - float(len(skip))/float(tot)*100)] - logf.write('%s\n' % '\t'.join(logme)) - ### Calculate mean and standard deviation of scores on a per relationship - ### type basis, allowing us to flag outliers for each particular relationship - ### type - relstats = {} - relCounts = {} - outlierFiles = {} - for relCode, relInfo in REL_LOOKUP.items(): - relName, relColor, relStyle = relInfo - useme = [means[x] for x in xrange(len(means)) if rels[x] == relCode] - relCounts[relCode] = len(useme) - mm = scipy.mean(useme) - ms = scipy.std(useme) - useme = [sdevs[x] for x in xrange(len(sdevs)) if rels[x] == relCode] - sm = scipy.mean(useme) - ss = scipy.std(useme) - relstats[relCode] = {'sd':(sm,ss), 'mean':(mm,ms)} - logf.write('Relstate %s: mean(mean)=%3.2f sdev(mean)=%3.2f, mean(sdev)=%3.2f sdev(sdev)=%3.2f\n' % (relName, mm, ms, sm, ss)) - - ### now fake z scores for each subject like abecasis recommends max(|zmu|,|zsd|) - ### within each group, for each pair, z=(groupmean-pairmean)/groupsd - available = len(means) - logf.write('%d pairs are available of %d\n' % (available, tot)) - ### s = '\nOutliers:\nrelationship\tzmean\tzsd\tped1\tped2\tmean\tsd\trmeanmean\trmeansd\trsdmean\trsdsd\n' - ### logf.write(s) - pairnum = 0 - offset = 0 - nOutliers = 0 - cexs = [] - outlierRecords = dict([(r, []) for r in range(N_RELATIONSHIP_TYPES)]) - zsdmax = 0 - for s1 in range(nSubjects): - if s1 in emptyRows: - continue - (fid1,iid1,did1,mid1,sex1,aff1,ok1,d_sid1,m_sid1) = scache[s1] - for s2 in range(s1+1, nSubjects): - if s2 in emptyRows: - continue - if pairnum not in skip: - ### Get group stats for this relationship - (fid2,iid2,did2,mid2,sex2,aff2,ok2,d_sid2,m_sid2) = scache[s2] - try: - r = rels[offset] - except IndexError: - logf.write('###OOPS offset %d available %d pairnum %d len(rels) %d', offset, available, pairnum, len(rels)) - rmm,rmd = relstats[r]['mean'] # group mean, group meansd alleles shared - rdm,rdd = relstats[r]['sd'] # group sdmean, group sdsd alleles shared - - try: - zsd = (sdevs[offset] - rdm)/rdd # distance from group mean in group sd units - except: - zsd = 1 - if abs(zsd) > zsdmax: - zsdmax = zsd # keep for sort scaling - try: - zmean = (means[offset] - rmm)/rmd # distance from group mean - except: - zmean = 1 - zmeans[offset] = zmean - zstds[offset] = zsd - pid=(s1,s2) - zrad = max(zsd,zmean) - if zrad < 4: - zrad = 2 - elif 4 < zrad < 15: - zrad = 3 # to 9 - else: # > 15 6=24+ - zrad=zrad/4 - zrad = min(zrad,6) # scale limit - zrad = max(2,max(zsd,zmean)) # as > 2, z grows - pair_data[pid] = (zmean,zsd,r,zrad) - if max(zsd,zmean) > Zcutoff: # is potentially interesting - mean = means[offset] - sdev = sdevs[offset] - outlierRecords[r].append((mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd)) - nOutliers += 1 - tbl.write('%s_%s\t%s_%s\t%f\t%f\t%f\t%f\t%d\t%s\n' % \ - (fid1, iid1, fid2, iid2, mean, sdev, zmean,zsd, ngeno, relcode)) - offset += 1 - pairnum += 1 - logf.write( 'Outliers: %s\n' % (nOutliers)) - - ### Write outlier files for each relationship type - repOut.append('<h2>Outliers in tab delimited files linked above are also listed below</h2>') - lzsd = round(numpy.log10(zsdmax)) + 1 - scalefactor = 10**lzsd - for relCode, relInfo in REL_LOOKUP.items(): - relName, _, _ = relInfo - outliers = outlierRecords[relCode] - if not outliers: - continue - outliers = [(scalefactor*int(abs(x[3]))+ int(abs(x[2])),x) for x in outliers] # decorate - outliers.sort() - logf.write('### outliers after decorated sort=%s' % outliers) - outliers.reverse() # largest deviation first - logf.write('### outliers after decorated sort=%s' % outliers) - outliers = [x[1] for x in outliers] # undecorate - nrows = len(outliers) - truncated = 0 - if nrows > MAX_SHOW_ROWS: - s = '<h3>%s outlying pairs (top %d of %d) from %s</h3><table border="0" cellpadding="3">' % (relName, - MAX_SHOW_ROWS,nrows,title) - truncated = nrows - MAX_SHOW_ROWS - else: - s = '<h3>%s outlying pairs (n=%d) from %s</h3><table border="0" cellpadding="3">' % (relName,nrows,title) - repOut.append(s) - fhname = '%s_rgGRR_%s_outliers.xls' % (title, relName) - fhpath = os.path.join(outdir,fhname) - fh = open(fhpath, 'w') - newfiles.append(fhname) - explanations.append('%s Outlier Pairs %s, N=%d, Cutoff SD=%f' % (relName,title,len(outliers),Zcutoff)) - fh.write(OUTLIERS_HEADER) - s = ''.join(['<th>%s</th>' % x for x in OUTLIERS_HEADER_list]) - repOut.append('<tr align="center">%s</tr>' % s) - for n,rec in enumerate(outliers): - #(mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd) = rec - fh.write('%f\t%f\t%f\t%f\t%s\t%s\t%s\t%s\t%f\t%f\t%f\t%f\n' % tuple(rec)) - # (mean, sdev, zmean, zsd, fid1, iid1, fid2, iid2, rmm, rmd, rdm, rdd)) - s = '''<td>%f</td><td>%f</td><td>%f</td><td>%f</td><td>%s</td><td>%s</td> - <td>%s</td><td>%s</td><td>%f</td><td>%f</td><td>%f</td><td>%f</td>''' % tuple(rec) - if n < MAX_SHOW_ROWS: - repOut.append('<tr align="center">%s</tr>' % s) - if truncated > 0: - repOut.append('<H2>WARNING: %d rows truncated - see outlier file for all %d rows</H2>' % (truncated, - nrows)) - fh.close() - repOut.append('</table><p>') - - ### Now, draw the plot in jpeg and svg formats, and optionally in the PDF format - ### if requested - logf.write('Plotting ...') - pointColors = [REL_COLORS[rel] for rel in rels] - pointStyles = [REL_POINTS[rel] for rel in rels] - - mainTitle = '%s (%s subjects, %d snp)' % (title, nSubjects, nrsSamples) - svg.write(SVG_HEADER % (SVG_COLORS[0],SVG_COLORS[1],SVG_COLORS[2],SVG_COLORS[3],SVG_COLORS[4], - SVG_COLORS[5],SVG_COLORS[6],SVG_COLORS[0],SVG_COLORS[0],SVG_COLORS[1],SVG_COLORS[1], - SVG_COLORS[2],SVG_COLORS[2],SVG_COLORS[3],SVG_COLORS[3],SVG_COLORS[4],SVG_COLORS[4], - SVG_COLORS[5],SVG_COLORS[5],SVG_COLORS[6],SVG_COLORS[6],mainTitle)) - #rpy.r.jpeg(filename='%s.jpg' % (title), width=1600, height=1200, pointsize=12, quality=100, bg='white') - #rpy.r.par(mai=(1,1,1,0.5)) - #rpy.r('par(xaxs="i",yaxs="i")') - #rpy.r.plot(means, sdevs, main=mainTitle, ylab=Y_AXIS_LABEL, xlab=X_AXIS_LABEL, cex=cexs, col=pointColors, pch=pointStyles, xlim=(0,2), ylim=(0,2)) - #rpy.r.legend(LEGEND_ALIGN, legend=REL_STATES, pch=REL_POINTS, col=REL_COLORS, title=LEGEND_TITLE) - #rpy.r.grid(nx=10, ny=10, col='lightgray', lty='dotted') - #rpy.r.dev_off() - - ### We will now go through each relationship type to partition plot points - ### into "bulk" and "outlier" groups. Bulk points will represent common - ### mean/sdev pairs and will cover the majority of the points in the plot -- - ### they will use generic tooltip informtion about all of the pairs - ### represented by that point. "Outlier" points will be uncommon pairs, - ### with very specific information in their tooltips. It would be nice to - ### keep hte total number of plotted points in the SVG representation to - ### ~10000 (certainly less than 100000?) - pointMap = {} - orderedRels = [y[1] for y in reversed(sorted([(relCounts.get(x, 0),x) for x in REL_LOOKUP.keys()]))] - # do we really want this? I want out of zone points last and big - for relCode in orderedRels: - svgColor = SVG_COLORS[relCode] - relName, relColor, relStyle = REL_LOOKUP[relCode] - svg.write('<g id="%s" style="stroke:%s; fill:%s; fill-opacity:1.0; stroke-width:1;" cursor="pointer">\n' % (relName, svgColor, svgColor)) - pMap = pointMap.setdefault(relCode, {}) - nPoints = 0 - rpairs=[] - rgenos=[] - rmeans=[] - rsdevs=[] - rz = [] - for x,rel in enumerate(rels): # all pairs - if rel == relCode: - s1,s2 = pairs[x] - pid=(s1,s2) - zmean,zsd,r,zrad = pair_data[pid][:4] - rpairs.append(pairs[x]) - rgenos.append(ngenoL[x]) - rmeans.append(means[x]) - rsdevs.append(sdevs[x]) - rz.append(zrad) - ### Now add the svg point group for this relationship to the svg file - for x in range(len(rmeans)): - svgX = '%d' % ((rmeans[x] - 1.0) * PLOT_WIDTH) # changed so mean scale is 1-2 - svgY = '%d' % (PLOT_HEIGHT - (rsdevs[x] * PLOT_HEIGHT)) # changed so sd scale is 0-1 - s1, s2 = rpairs[x] - (fid1,uid1,did1,mid1,sex1,phe1,iid1,d_sid1,m_sid1) = scache[s1] - (fid2,uid2,did2,mid2,sex2,phe2,iid2,d_sid2,m_sid2) = scache[s2] - ngenos = rgenos[x] - nPoints += 1 - point = pMap.setdefault((svgX, svgY), []) - point.append((rmeans[x], rsdevs[x], fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, ngenos,rz[x])) - for (svgX, svgY) in pMap: - points = pMap[(svgX, svgY)] - svgX = int(svgX) - svgY = int(svgY) - if len(points) > 1: - mmean,dmean = calcMeanSD([p[0] for p in points]) - msdev,dsdev = calcMeanSD([p[1] for p in points]) - mgeno,dgeno = calcMeanSD([p[-1] for p in points]) - mingeno = min([p[-1] for p in points]) - maxgeno = max([p[-1] for p in points]) - svg.write("""<circle cx="%d" cy="%d" r="2" - onmouseover="showBTT(evt, %d, %1.2f, %1.2f, %1.2f, %1.2f, %d, %d, %d, %d, %d)" - onmouseout="hideBTT(evt)" />\n""" % (svgX, svgY, relCode, mmean, dmean, msdev, dsdev, len(points), mgeno, dgeno, mingeno, maxgeno)) - else: - mean, sdev, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, ngenos, zrad = points[0][:12] - rmean = float(relstats[relCode]['mean'][0]) - rsdev = float(relstats[relCode]['sd'][0]) - if zrad < 4: - zrad = 2 - elif 4 < zrad < 9: - zrad = 3 # to 9 - else: # > 9 5=15+ - zrad=zrad/3 - zrad = min(zrad,5) # scale limit - if zrad <= 3: - svg.write('<circle cx="%d" cy="%d" r="%s" onmouseover="showOTT(evt, %d, \'%s,%s,%s,%s\', \'%s,%s,%s,%s\', %1.2f, %1.2f, %s, %1.2f, %1.2f)" onmouseout="hideOTT(evt)" />\n' % (svgX, svgY, zrad, relCode, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, mean, sdev, ngenos, rmean, rsdev)) - else: # highlight pairs a long way from expectation by outlining circle in red - svg.write("""<circle cx="%d" cy="%d" r="%s" style="stroke:red; fill:%s; fill-opacity:1.0; stroke-width:1;" - onmouseover="showOTT(evt, %d, \'%s,%s,%s,%s\', \'%s,%s,%s,%s\', %1.2f, %1.2f, %s, %1.2f, %1.2f)" - onmouseout="hideOTT(evt)" />\n""" % \ - (svgX, svgY, zrad, svgColor, relCode, fid1, iid1, did1, mid1, fid2, iid2, did2, mid2, mean, sdev, ngenos, rmean, rsdev)) - svg.write('</g>\n') - - ### Create a pdf as well if indicated on the command line - ### WARNING! for framingham share, with about 50M pairs, this is a 5.5GB pdf! -## if pdftoo: -## pdfname = '%s.pdf' % (title) -## rpy.r.pdf(pdfname, 6, 6) -## rpy.r.par(mai=(1,1,1,0.5)) -## rpy.r('par(xaxs="i",yaxs="i")') -## rpy.r.plot(means, sdevs, main='%s, %d snp' % (title, nSamples), ylab=Y_AXIS_LABEL, xlab=X_AXIS_LABEL, cex=cexs, col=pointColors, pch=pointStyles, xlim=(0,2), ylim=(0,2)) -## rpy.r.legend(LEGEND_ALIGN, legend=REL_STATES, pch=REL_POINTS, col=REL_COLORS, title=LEGEND_TITLE) -## rpy.r.grid(nx=10, ny=10, col='lightgray', lty='dotted') -## rpy.r.dev_off() - - ### Draw polygons - if showPolygons: - svg.write('<g id="polygons" cursor="pointer">\n') - for rel, poly in POLYGONS.items(): - points = ' '.join(['%s,%s' % ((p[0]-1.0)*float(PLOT_WIDTH), (PLOT_HEIGHT - p[1]*PLOT_HEIGHT)) for p in poly]) - svg.write('<polygon points="%s" fill="transparent" style="stroke:%s; stroke-width:1"/>\n' % (points, SVG_COLORS[rel])) - svg.write('</g>\n') - - - svg.write(SVG_FOOTER) - svg.close() - return newfiles,explanations,repOut - -def doIBS(n=100): - """parse parameters from galaxy - expect 'input pbed path' 'basename' 'outpath' 'title' 'logpath' 'n' - <command interpreter="python"> - rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" - '$out_file1' '$out_file1.files_path' "$title" '$n' '$Z' - </command> - - """ - u="""<command interpreter="python"> - rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name" - '$out_file1' '$out_file1.files_path' "$title" '$n' '$Z' - </command>""" - - if len(sys.argv) < 8: - print >> sys.stdout, 'Need pbed inpath, basename, out_htmlname, outpath, title, logpath, nSNP, Zcutoff on command line please' - print >> sys.stdout, u - sys.exit(1) - ts = '%s%s' % (string.punctuation,string.whitespace) - ptran = string.maketrans(ts,'_'*len(ts)) - inpath = sys.argv[1] - basename = sys.argv[2] - outhtml = sys.argv[3] - newfilepath = sys.argv[4] - try: - os.makedirs(newfilepath) - except: - pass - title = sys.argv[5].translate(ptran) - logfname = 'Log_%s.txt' % title - logpath = os.path.join(newfilepath,logfname) # log was a child - make part of html extra_files_path zoo - n = int(sys.argv[6]) - try: - Zcutoff = float(sys.argv[7]) - except: - Zcutoff = 2.0 - try: - os.makedirs(newfilepath) - except: - pass - logf = file(logpath,'w') - newfiles,explanations,repOut = doIBSpy(inpath=inpath,basename=basename,outdir=newfilepath, - logf=logf,nrsSamples=n,title=title,pdftoo=0,Zcutoff=Zcutoff) - logf.close() - logfs = file(logpath,'r').readlines() - lf = file(outhtml,'w') - lf.write(galhtmlprefix % PROGNAME) - # this is a mess. todo clean up - should each datatype have it's own directory? Yes - # probably. Then titles are universal - but userId libraries are separate. - s = '<div>Output from %s run at %s<br>\n' % (PROGNAME,timenow()) - lf.write('<h4>%s</h4>\n' % s) - fixed = ["'%s'" % x for x in sys.argv] # add quotes just in case - s = 'If you need to rerun this analysis, the command line was\n<pre>%s</pre>\n</div>' % (' '.join(fixed)) - lf.write(s) - #s = """<object data="%s" type="image/svg+xml" width="%d" height="%d"> - # <embed src="%s" type="image/svg+xml" width="%d" height="%d" /> - # </object>""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT,newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) - s = """ <embed src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) - #s = """ <iframe src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT) - lf.write(s) - lf.write('<div><h4>Click the links below to save output files and plots</h4><br><ol>\n') - for i in range(len(newfiles)): - if i == 0: - lf.write('<li><a href="%s" type="image/svg+xml" >%s</a></li>\n' % (newfiles[i],explanations[i])) - else: - lf.write('<li><a href="%s">%s</a></li>\n' % (newfiles[i],explanations[i])) - flist = os.listdir(newfilepath) - for fname in flist: - if not fname in newfiles: - lf.write('<li><a href="%s">%s</a></li>\n' % (fname,fname)) - lf.write('</ol></div>') - lf.write('<div>%s</div>' % ('\n'.join(repOut))) # repOut is a list of tables - lf.write('<div><hr><h3>Log from this job (also stored in %s)</h3><pre>%s</pre><hr></div>' % (logfname,'\n'.join(logfs))) - lf.write('</body></html>\n') - lf.close() - logf.close() - -if __name__ == '__main__': - doIBS() - - +""" +# july 2009: Need to see outliers so need to draw them last? +# could use clustering on the zscores to guess real relationships for unrelateds +# but definitely need to draw last +# added MAX_SHOW_ROWS to limit the length of the main report page +# Changes for Galaxy integration +# added more robust knuth method for one pass mean and sd +# no difference really - let's use scipy.mean() and scipy.std() instead... +# fixed labels and changed to .xls for outlier reports so can open in excel +# interesting - with a few hundred subjects, 5k gives good resolution +# and 100k gives better but not by much +# TODO remove non autosomal markers +# TODO it would be best if label had the zmean and zsd as these are what matter for +# outliers rather than the group mean/sd +# mods to rgGRR.py from channing CVS which John Ziniti has rewritten to produce SVG plots +# to make a Galaxy tool - we need the table of mean and SD for interesting pairs, the SVG and the log +# so the result should be an HTML file + +# rgIBS.py +# use a random subset of markers for a quick ibs +# to identify sample dups and closely related subjects +# try snpMatrix and plink and see which one works best for us? +# abecasis grr plots mean*sd for every subject to show clusters +# mods june 23 rml to avoid non-autosomal markers +# we seem to be distinguishing parent-child by gender - 2 clouds! + + +snpMatrix from David Clayton has: +ibs.stats function to calculate the identity-by-state stats of a group of samples +Description +Given a snp.matrix-class or a X.snp.matrix-class object with N samples, calculates some statistics +about the relatedness of every pair of samples within. + +Usage +ibs.stats(x) +8 ibs.stats +Arguments +x a snp.matrix-class or a X.snp.matrix-class object containing N samples +Details +No-calls are excluded from consideration here. +Value +A data.frame containing N(N - 1)/2 rows, where the row names are the sample name pairs separated +by a comma, and the columns are: +Count count of identical calls, exclusing no-calls +Fraction fraction of identical calls comparied to actual calls being made in both samples +Warning +In some applications, it may be preferable to subset a (random) selection of SNPs first - the +calculation +time increases as N(N - 1)M/2 . Typically for N = 800 samples and M = 3000 SNPs, the +calculation time is about 1 minute. A full GWA scan could take hours, and quite unnecessary for +simple applications such as checking for duplicate or related samples. +Note +This is mostly written to find mislabelled and/or duplicate samples. +Illumina indexes their SNPs in alphabetical order so the mitochondria SNPs comes first - for most +purpose it is undesirable to use these SNPs for IBS purposes. +TODO: Worst-case S4 subsetting seems to make 2 copies of a large object, so one might want to +subset before rbind(), etc; a future version of this routine may contain a built-in subsetting facility +""" +import sys,os,time,random,string,copy,optparse + +try: + set +except NameError: + from Sets import Set as set + +from rgutils import timenow,pruneLD,plinke +import plinkbinJZ + + +opts = None +verbose = False + +showPolygons = False + +class NullDevice: + def write(self, s): + pass + +tempstderr = sys.stderr # save +sys.stderr = NullDevice() +# need to avoid blather about deprecation and other strange stuff from scipy +# the current galaxy job runner assumes that +# the job is in error if anything appears on sys.stderr +# grrrrr. James wants to keep it that way instead of using the +# status flag for some strange reason. Presumably he doesn't use R or (in this case, scipy) +import numpy +import scipy +from scipy import weave + + +sys.stderr=tempstderr + + +PROGNAME = os.path.split(sys.argv[0])[-1] +X_AXIS_LABEL = 'Mean Alleles Shared' +Y_AXIS_LABEL = 'SD Alleles Shared' +LEGEND_ALIGN = 'topleft' +LEGEND_TITLE = 'Relationship' +DEFAULT_SYMBOL_SIZE = 1.0 # default symbol size +DEFAULT_SYMBOL_SIZE = 0.5 # default symbol size + +### Some colors for R/rpy +R_BLACK = 1 +R_RED = 2 +R_GREEN = 3 +R_BLUE = 4 +R_CYAN = 5 +R_PURPLE = 6 +R_YELLOW = 7 +R_GRAY = 8 + +### ... and some point-styles + +### +PLOT_HEIGHT = 600 +PLOT_WIDTH = 1150 + + +#SVG_COLORS = ('black', 'darkblue', 'blue', 'deepskyblue', 'firebrick','maroon','crimson') +#SVG_COLORS = ('cyan','dodgerblue','mediumpurple', 'fuchsia', 'red','gold','gray') +SVG_COLORS = ('cyan','dodgerblue','mediumpurple','forestgreen', 'lightgreen','gold','gray') +# dupe,parentchild,sibpair,halfsib,parents,unrel,unkn +#('orange', 'red', 'green', 'chartreuse', 'blue', 'purple', 'gray') + +OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Rel_Mean)\tSdev(Rel_Mean)\tMean(Rel_Sdev)\tSdev(Rel_Sdev)\n' +OUTLIERS_HEADER_list = ['Mean','Sdev','ZMean','ZSdev','FID1','IID1','FID2','IID2', +'RGMean_M','RGMean_SD','RGSD_M','RGSD_SD'] +TABLE_HEADER='fid1 iid1\tfid2 iid2\tmean\tsdev\tzmean\tzsdev\tgeno\trelcode\n' + + +### Relationship codes, text, and lookups/mappings +N_RELATIONSHIP_TYPES = 7 +REL_DUPE, REL_PARENTCHILD, REL_SIBS, REL_HALFSIBS, REL_RELATED, REL_UNRELATED, REL_UNKNOWN = range(N_RELATIONSHIP_TYPES) +REL_LOOKUP = { + REL_DUPE: ('dupe', R_BLUE, 1), + REL_PARENTCHILD: ('parentchild', R_YELLOW, 1), + REL_SIBS: ('sibpairs', R_RED, 1), + REL_HALFSIBS: ('halfsibs', R_GREEN, 1), + REL_RELATED: ('parents', R_PURPLE, 1), + REL_UNRELATED: ('unrelated', R_CYAN, 1), + REL_UNKNOWN: ('unknown', R_GRAY, 1), + } +OUTLIER_STDEVS = { + REL_DUPE: 2, + REL_PARENTCHILD: 2, + REL_SIBS: 2, + REL_HALFSIBS: 2, + REL_RELATED: 2, + REL_UNRELATED: 3, + REL_UNKNOWN: 2, + } +# note now Z can be passed in + +REL_STATES = [REL_LOOKUP[r][0] for r in range(N_RELATIONSHIP_TYPES)] +REL_COLORS = SVG_COLORS +REL_POINTS = [REL_LOOKUP[r][2] for r in range(N_RELATIONSHIP_TYPES)] + +DEFAULT_MAX_SAMPLE_SIZE = 10000 + +REF_COUNT_HOM1 = 3 +REF_COUNT_HET = 2 +REF_COUNT_HOM2 = 1 +MISSING = 0 +MAX_SHOW_ROWS = 100 # framingham has millions - delays showing output page - so truncate and explain +MARKER_PAIRS_PER_SECOND_SLOW = 15000000.0 +MARKER_PAIRS_PER_SECOND_FAST = 70000000.0 + + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" + + +SVG_HEADER = '''<?xml version="1.0" standalone="no"?> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.2//EN" "http://www.w3.org/Graphics/SVG/1.2/DTD/svg12.dtd"> + +<svg width="1280" height="800" + xmlns="http://www.w3.org/2000/svg" version="1.2" + xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1280 800" onload="init()"> + + <script type="text/ecmascript" xlink:href="/static/scripts/checkbox_and_radiobutton.js"/> + <script type="text/ecmascript" xlink:href="/static/scripts/helper_functions.js"/> + <script type="text/ecmascript" xlink:href="/static/scripts/timer.js"/> + <script type="text/ecmascript"> + <![CDATA[ + var checkBoxes = new Array(); + var radioGroupBandwidth; + var colours = ['%s','%s','%s','%s','%s','%s','%s']; + function init() { + var style = {"font-family":"Arial,Helvetica", "fill":"black", "font-size":12}; + var dist = 12; + var yOffset = 4; + + //A checkBox for each relationship type dupe,parentchild,sibpair,halfsib,parents,unrel,unkn + checkBoxes["dupe"] = new checkBox("dupe","checkboxes",20,40,"cbRect","cbCross",true,"Duplicate",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["parentchild"] = new checkBox("parentchild","checkboxes",20,60,"cbRect","cbCross",true,"Parent-Child",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["sibpairs"] = new checkBox("sibpairs","checkboxes",20,80,"cbRect","cbCross",true,"Sib-pairs",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["halfsibs"] = new checkBox("halfsibs","checkboxes",20,100,"cbRect","cbCross",true,"Half-sibs",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["parents"] = new checkBox("parents","checkboxes",20,120,"cbRect","cbCross",true,"Parents",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["unrelated"] = new checkBox("unrelated","checkboxes",20,140,"cbRect","cbCross",true,"Unrelated",style,dist,yOffset,undefined,hideShowLayer); + checkBoxes["unknown"] = new checkBox("unknown","checkboxes",20,160,"cbRect","cbCross",true,"Unknown",style,dist,yOffset,undefined,hideShowLayer); + + } + + function hideShowLayer(id, status, label) { + var vis = "hidden"; + if (status) { + vis = "visible"; + } + document.getElementById(id).setAttributeNS(null, 'visibility', vis); + } + + function showBTT(evt, rel, mm, dm, md, dd, n, mg, dg, lg, hg) { + var x = parseInt(evt.pageX)-250; + var y = parseInt(evt.pageY)-110; + switch(rel) { + case 0: + fill = colours[rel]; + relt = "dupe"; + break; + case 1: + fill = colours[rel]; + relt = "parentchild"; + break; + case 2: + fill = colours[rel]; + relt = "sibpairs"; + break; + case 3: + fill = colours[rel]; + relt = "halfsibs"; + break; + case 4: + fill = colours[rel]; + relt = "parents"; + break; + case 5: + fill = colours[rel]; + relt = "unrelated"; + break; + case 6: + fill = colours[rel]; + relt = "unknown"; + break; + default: + fill = "cyan"; + relt = "ERROR_CODE: "+rel; + } + + document.getElementById("btRel").textContent = "GROUP: "+relt; + document.getElementById("btMean").textContent = "mean="+mm+" +/- "+dm; + document.getElementById("btSdev").textContent = "sdev="+dm+" +/- "+dd; + document.getElementById("btPair").textContent = "npairs="+n; + document.getElementById("btGeno").textContent = "ngenos="+mg+" +/- "+dg+" (min="+lg+", max="+hg+")"; + document.getElementById("btHead").setAttribute('fill', fill); + + var tt = document.getElementById("btTip"); + tt.setAttribute("transform", "translate("+x+","+y+")"); + tt.setAttribute('visibility', 'visible'); + } + + function showOTT(evt, rel, s1, s2, mean, sdev, ngeno, rmean, rsdev) { + var x = parseInt(evt.pageX)-150; + var y = parseInt(evt.pageY)-180; + + switch(rel) { + case 0: + fill = colours[rel]; + relt = "dupe"; + break; + case 1: + fill = colours[rel]; + relt = "parentchild"; + break; + case 2: + fill = colours[rel]; + relt = "sibpairs"; + break; + case 3: + fill = colours[rel]; + relt = "halfsibs"; + break; + case 4: + fill = colours[rel]; + relt = "parents"; + break; + case 5: + fill = colours[rel]; + relt = "unrelated"; + break; + case 6: + fill = colours[rel]; + relt = "unknown"; + break; + default: + fill = "cyan"; + relt = "ERROR_CODE: "+rel; + } + + document.getElementById("otRel").textContent = "PAIR: "+relt; + document.getElementById("otS1").textContent = "s1="+s1; + document.getElementById("otS2").textContent = "s2="+s2; + document.getElementById("otMean").textContent = "mean="+mean; + document.getElementById("otSdev").textContent = "sdev="+sdev; + document.getElementById("otGeno").textContent = "ngenos="+ngeno; + document.getElementById("otRmean").textContent = "relmean="+rmean; + document.getElementById("otRsdev").textContent = "relsdev="+rsdev; + document.getElementById("otHead").setAttribute('fill', fill); + + var tt = document.getElementById("otTip"); + tt.setAttribute("transform", "translate("+x+","+y+")"); + tt.setAttribute('visibility', 'visible'); + } + + function hideBTT(evt) { + document.getElementById("btTip").setAttributeNS(null, 'visibility', 'hidden'); + } + + function hideOTT(evt) { + document.getElementById("otTip").setAttributeNS(null, 'visibility', 'hidden'); + } + + ]]> + </script> + <defs> +  + <symbol id="cbRect" overflow="visible"> + <rect x="-5" y="-5" width="10" height="10" fill="white" stroke="dimgray" stroke-width="1" cursor="pointer"/> + </symbol> + <symbol id="cbCross" overflow="visible"> + <g pointer-events="none" stroke="black" stroke-width="1"> + <line x1="-3" y1="-3" x2="3" y2="3"/> + <line x1="3" y1="-3" x2="-3" y2="3"/> + </g> + </symbol> + </defs> + +<desc>Developer Works Dynamic Scatter Graph Scaling Example</desc> + + +<g style="stroke-width:1.0; stroke:black; shape-rendering:crispEdges"> +  + <path d="M 100 100 L 1250 100 Z"/> + <path d="M 100 700 L 1250 700 Z"/> + +  + <path d="M 100 100 L 100 700 Z"/> + <path d="M 1250 100 L 1250 700 Z"/> +</g> + +<g transform="translate(100,100)"> + +  + <g style="fill:none; stroke:#dddddd; stroke-width:1; stroke-dasharray:2,2; text-anchor:end; shape-rendering:crispEdges"> + +  + <line x1="125" y1="0" x2="115" y2="600" /> + <line x1="230" y1="0" x2="230" y2="600" /> + <line x1="345" y1="0" x2="345" y2="600" /> + <line x1="460" y1="0" x2="460" y2="600" /> + <line x1="575" y1="0" x2="575" y2="600" style="stroke-dasharray:none;" /> + <line x1="690" y1="0" x2="690" y2="600" /> + <line x1="805" y1="0" x2="805" y2="600" /> + <line x1="920" y1="0" x2="920" y2="600" /> + <line x1="1035" y1="0" x2="1035" y2="600" /> + +  + <line x1="0" y1="60" x2="1150" y2="60" /> + <line x1="0" y1="120" x2="1150" y2="120" /> + <line x1="0" y1="180" x2="1150" y2="180" /> + <line x1="0" y1="240" x2="1150" y2="240" /> + <line x1="0" y1="300" x2="1150" y2="300" style="stroke-dasharray:none;" /> + <line x1="0" y1="360" x2="1150" y2="360" /> + <line x1="0" y1="420" x2="1150" y2="420" /> + <line x1="0" y1="480" x2="1150" y2="480" /> + <line x1="0" y1="540" x2="1150" y2="540" /> + </g> + +  + <g style="fill:black; stroke:none" font-size="12" font-family="Arial" transform="translate(25,25)"> + <rect width="160" height="270" style="fill:none; stroke:black; shape-rendering:crispEdges" /> + <text x="5" y="20" style="fill:black; stroke:none;" font-size="13" font-weight="bold">Given Pair Relationship</text> + <rect x="120" y="35" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="55" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="75" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="95" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="115" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="135" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <rect x="120" y="155" width="10" height="10" fill="%s" stroke="%s" stroke-width="1" cursor="pointer"/> + <text x="15" y="195" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore gt 15</text> + <circle cx="125" cy="192" r="6" style="stroke:red; fill:gold; fill-opacity:1.0; stroke-width:1;"/> + <text x="15" y="215" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore 4 to 15</text> + <circle cx="125" cy="212" r="3" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> + <text x="15" y="235" style="fill:black; stroke:none" font-size="12" font-family="Arial" >Zscore lt 4</text> + <circle cx="125" cy="232" r="2" style="stroke:gold; fill:gold; fill-opacity:1.0; stroke-width:1;"/> + <g id="checkboxes"> + </g> + </g> + + + <g style='fill:black; stroke:none' font-size="17" font-family="Arial"> +  + <text x="480" y="660">Mean Alleles Shared</text> + <text x="0" y="630" >1.0</text> + <text x="277" y="630" >1.25</text> + <text x="564" y="630" >1.5</text> + <text x="842" y="630" >1.75</text> + <text x="1140" y="630" >2.0</text> + </g> + + <g transform="rotate(270)" style="fill:black; stroke:none" font-size="17" font-family="Arial"> +  + <text x="-350" y="-40">SD Alleles Shared</text> + <text x="-20" y="-10" >1.0</text> + <text x="-165" y="-10" >0.75</text> + <text x="-310" y="-10" >0.5</text> + <text x="-455" y="-10" >0.25</text> + <text x="-600" y="-10" >0.0</text> + </g> + + +<g style="fill:black; stroke:none" font-size="18" font-family="Arial"> + <text x="425" y="-30">%s</text> +</g> + + +''' + +SVG_FOOTER = ''' + +</g> +<g id="btTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> + <rect width="250" height="110" style="fill:silver" rx="2" ry="2"/> + <rect id="btHead" width="250" height="20" rx="2" ry="2" /> + <text id="btRel" y="14" x="85">unrelated</text> + <text id="btMean" y="40" x="4">mean=1.5 +/- 0.04</text> + <text id="btSdev" y="60" x="4">sdev=0.7 +/- 0.03</text> + <text id="btPair" y="80" x="4">npairs=1152</text> + <text id="btGeno" y="100" x="4">ngenos=4783 +/- 24 (min=1000, max=5000)</text> +</g> + +<g id="otTip" visibility="hidden" style="stroke-width:1.0; fill:black; stroke:none;" font-size="10" font-family="Arial"> + <rect width="150" height="180" style="fill:silver" rx="2" ry="2"/> + <rect id="otHead" width="150" height="20" rx="2" ry="2" /> + <text id="otRel" y="14" x="40">sibpairs</text> + <text id="otS1" y="40" x="4">s1=fid1,iid1</text> + <text id="otS2" y="60" x="4">s2=fid2,iid2</text> + <text id="otMean" y="80" x="4">mean=1.82</text> + <text id="otSdev" y="100" x="4">sdev=0.7</text> + <text id="otGeno" y="120" x="4">ngeno=4487</text> + <text id="otRmean" y="140" x="4">relmean=1.85</text> + <text id="otRsdev" y="160" x="4">relsdev=0.65</text> +</g> +</svg> +''' + +OUTLIERS_HEADER = 'Mean\tSdev\tZ(mean)\tZ(sdev)\tFID1\tIID1\tFID2\tIID2\tMean(Mean)\tSdev(Mean)\tMean(Sdev)\tSdev(Sdev)\n' + +DEFAULT_MAX_SAMPLE_SIZE = 5000 + +REF_COUNT_HOM1 = 3 +REF_COUNT_HET = 2 +REF_COUNT_HOM2 = 1 +MISSING = 0 + +MARKER_PAIRS_PER_SECOND_SLOW = 15000000 +MARKER_PAIRS_PER_SECOND_FAST = 70000000 + +POLYGONS = { + REL_UNRELATED: ((1.360, 0.655), (1.385, 0.730), (1.620, 0.575), (1.610, 0.505)), + REL_HALFSIBS: ((1.630, 0.500), (1.630, 0.550), (1.648, 0.540), (1.648, 0.490)), + REL_SIBS: ((1.660, 0.510), (1.665, 0.560), (1.820, 0.410), (1.820, 0.390)), + REL_PARENTCHILD: ((1.650, 0.470), (1.650, 0.490), (1.750, 0.440), (1.750, 0.420)), + REL_DUPE: ((1.970, 0.000), (1.970, 0.150), (2.000, 0.150), (2.000, 0.000)), + } + +def distance(point1, point2): + """ Calculate the distance between two points + """ + (x1,y1) = [float(d) for d in point1] + (x2,y2) = [float(d) for d in point2] + dx = abs(x1 - x2) + dy = abs(y1 - y2) + return math.sqrt(dx**2 + dy**2) + +def point_inside_polygon(x, y, poly): + """ Determine if a point (x,y) is inside a given polygon or not + poly is a list of (x,y) pairs. + + Taken from: http://www.ariel.com.au/a/python-point-int-poly.html + """ + + n = len(poly) + inside = False + + p1x,p1y = poly[0] + for i in range(n+1): + p2x,p2y = poly[i % n] + if y > min(p1y,p2y): + if y <= max(p1y,p2y): + if x <= max(p1x,p2x): + if p1y != p2y: + xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x + if p1x == p2x or x <= xinters: + inside = not inside + p1x,p1y = p2x,p2y + return inside + +def readMap(pedfile): + """ + """ + mapfile = pedfile.replace('.ped', '.map') + marker_list = [] + if os.path.exists(mapfile): + print 'readMap: %s' % (mapfile) + fh = file(mapfile, 'r') + for line in fh: + marker_list.append(line.strip().split()) + fh.close() + print 'readMap: %s markers' % (len(marker_list)) + return marker_list + +def calcMeanSD(useme): + """ + A numerically stable algorithm is given below. It also computes the mean. + This algorithm is due to Knuth,[1] who cites Welford.[2] + n = 0 + mean = 0 + M2 = 0 + + foreach x in data: + n = n + 1 + delta = x - mean + mean = mean + delta/n + M2 = M2 + delta*(x - mean) // This expression uses the new value of mean + end for + + variance_n = M2/n + variance = M2/(n - 1) + """ + mean = 0.0 + M2 = 0.0 + sd = 0.0 + n = len(useme) + if n > 1: + for i,x in enumerate(useme): + delta = x - mean + mean = mean + delta/(i+1) # knuth uses n+=1 at start + M2 = M2 + delta*(x - mean) # This expression uses the new value of mean + variance = M2/(n-1) # assume is sample so lose 1 DOF + sd = pow(variance,0.5) + return mean,sd + + +def doIBSpy(ped=None,basename='',outdir=None,logf=None, + nrsSamples=10000,title='title',pdftoo=0,Zcutoff=2.0): + #def doIBS(pedName, title, nrsSamples=None, pdftoo=False): + """ started with snpmatrix but GRR uses actual IBS counts and sd's + """ + repOut = [] # text strings to add to the html display + refallele = {} + tblf = '%s_table.xls' % (title) + tbl = file(os.path.join(outdir,tblf), 'w') + tbl.write(TABLE_HEADER) + svgf = '%s.svg' % (title) + svg = file(os.path.join(outdir,svgf), 'w') + + nMarkers = len(ped._markers) + if nMarkers < 5: + print sys.stderr, '### ERROR - %d is too few markers for reliable estimation in %s - terminating' % (nMarkers,PROGNAME) + sys.exit(1) + nSubjects = len(ped._subjects) + nrsSamples = min(nMarkers, nrsSamples) + if opts and opts.use_mito: + markers = range(nMarkers) + nrsSamples = min(len(markers), nrsSamples) + sampleIndexes = sorted(random.sample(markers, nrsSamples)) + else: + autosomals = ped.autosomal_indices() + nrsSamples = min(len(autosomals), nrsSamples) + sampleIndexes = sorted(random.sample(autosomals, nrsSamples)) + + print '' + print 'Getting random.sample of %s from %s total' % (nrsSamples, nMarkers) + npairs = (nSubjects*(nSubjects-1))/2 # total rows in table + newfiles=[svgf,tblf] + explanations = ['rgGRR Plot (requires SVG)','Mean by SD alleles shared - %d rows' % npairs] + # these go with the output file links in the html file + s = 'Reading genotypes for %s subjects and %s markers\n' % (nSubjects, nrsSamples) + logf.write(s) + minUsegenos = nrsSamples/2 # must have half? + nGenotypes = nSubjects*nrsSamples + stime = time.time() + emptyRows = set() + genos = numpy.zeros((nSubjects, nrsSamples), dtype=int) + for s in xrange(nSubjects): + nValid = 0 + #getGenotypesByIndices(self, s, mlist, format) + genos[s] = ped.getGenotypesByIndices(s, sampleIndexes, format='ref') + nValid = sum([1 for g in genos[s] if g]) + if not nValid: + emptyRows.add(s) + sub = ped.getSubject(s) + print 'All missing for row %d (%s)' % (s, sub) + logf.write('All missing for row %d (%s)\n' % (s, sub)) + rtime = time.time() - stime + if verbose: + print '@@Read %s genotypes in %s seconds' % (nGenotypes, rtime) + + + ### Now the expensive part. For each pair of subjects, we get the mean number + ### and standard deviation of shared alleles over all of the markers where both + ### subjects have a known genotype. Identical subjects should have mean shared + ### alleles very close to 2.0 with a standard deviation very close to 0.0. + tot = nSubjects*(nSubjects-1)/2 + nprog = tot/10 + nMarkerpairs = tot * nrsSamples + estimatedTimeSlow = nMarkerpairs/MARKER_PAIRS_PER_SECOND_SLOW + estimatedTimeFast = nMarkerpairs/MARKER_PAIRS_PER_SECOND_FAST + + pairs = [] + pair_data = {} + means = [] ## Mean IBS for each pair + ngenoL = [] ## Count of comparable genotypes for each pair + sdevs = [] ## Standard dev for each pair + rels = [] ## A relationship code for each pair + zmeans = [0.0 for x in xrange(tot)] ## zmean score for each pair for the relgroup + zstds = [0.0 for x in xrange(tot)] ## zstd score for each pair for the relgrp + skip = set() + ndone = 0 ## How many have been done so far + + logf.write('Calculating %d pairs, updating every %d pairs...\n' % (tot, nprog)) + logf.write('Estimated time is %2.2f to %2.2f seconds ...\n' % (estimatedTimeFast, estimatedTimeSlow)) + + t1sum = 0 + t2sum = 0 + t3sum = 0 + now = time.time() + scache = {}

1 0