galaxy-dev
Threads by month
- ----- 2025 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- 1 participants
- 10009 discussions
[hg] galaxy 1523: Adding tools to compute Substitution rates.
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
22 Sep '08
details: http://www.bx.psu.edu/hg/galaxy/rev/dabed25dfbaf
changeset: 1523:dabed25dfbaf
user: guru
date: Sun Sep 21 17:36:28 2008 -0400
description:
Adding tools to compute Substitution rates.
7 file(s) affected in this change:
test-data/subRates1.out
test-data/subs.out
tool_conf.xml.sample
tools/regVariation/substitution_rates.py
tools/regVariation/substitution_rates.xml
tools/regVariation/substitutions.py
tools/regVariation/substitutions.xml
diffs (1734 lines):
diff -r 05974294cbf1 -r dabed25dfbaf test-data/subRates1.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subRates1.out Sun Sep 21 17:36:28 2008 -0400
@@ -0,0 +1,13 @@
+#Seq1 Start1 End1 Seq2 Start2 End2 L N p
+hg17.chrX 3816458 3816983 fr1.chrUn 343715247 343715776 525 188 0.3581
+hg17.chrX 3795168 3795525 fr1.chrUn 343710815 343711179 357 92 0.2577
+hg17.chrX 3787425 3787599 fr1.chrUn 343708230 343708404 174 37 0.2126
+hg17.chrX 3787284 3787384 fr1.chrUn 62078707 62078816 100 33 0.3300
+hg17.chrX 3776942 3777227 fr1.chrUn 343707053 343707336 283 122 0.4311
+hg17.chrX 3760375 3760468 fr1.chrUn 343706399 343706492 93 20 0.2151
+hg17.chrX 3733405 3733881 fr1.chrUn 303515824 303516268 444 186 0.4189
+hg17.chrX 3731355 3731463 fr1.chrUn 303515724 303515815 91 36 0.3956
+hg17.chrX 3730591 3731038 fr1.chrUn 303515378 303515724 346 126 0.3642
+hg17.chrX 3729219 3729457 fr1.chrUn 343703525 343703763 238 57 0.2395
+hg17.chrX 3700391 3700698 fr1.chrUn 241017738 241018068 307 112 0.3648
+hg17.chrX 3639441 3639646 fr1.chrUn 333536350 333536563 205 66 0.3220
diff -r 05974294cbf1 -r dabed25dfbaf test-data/subs.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subs.out Sun Sep 21 17:36:28 2008 -0400
@@ -0,0 +1,1379 @@
+#Chr Start End
+hg17.chrX 3816460 3816460
+fr1.chrUn 343715249 343715249
+hg17.chrX 3816462 3816463
+fr1.chrUn 343715251 343715252
+hg17.chrX 3816466 3816466
+fr1.chrUn 343715255 343715255
+hg17.chrX 3816471 3816471
+fr1.chrUn 343715260 343715260
+hg17.chrX 3816473 3816474
+fr1.chrUn 343715262 343715263
+hg17.chrX 3816478 3816479
+fr1.chrUn 343715267 343715268
+hg17.chrX 3816484 3816485
+fr1.chrUn 343715273 343715274
+hg17.chrX 3816493 3816494
+fr1.chrUn 343715284 343715285
+hg17.chrX 3816496 3816499
+fr1.chrUn 343715287 343715290
+hg17.chrX 3816502 3816502
+fr1.chrUn 343715293 343715293
+hg17.chrX 3816504 3816505
+fr1.chrUn 343715295 343715296
+hg17.chrX 3816507 3816507
+fr1.chrUn 343715298 343715298
+hg17.chrX 3816511 3816511
+fr1.chrUn 343715302 343715302
+hg17.chrX 3816515 3816516
+fr1.chrUn 343715306 343715307
+hg17.chrX 3816518 3816518
+fr1.chrUn 343715312 343715312
+hg17.chrX 3816521 3816522
+fr1.chrUn 343715315 343715316
+hg17.chrX 3816524 3816524
+fr1.chrUn 343715318 343715318
+hg17.chrX 3816531 3816531
+fr1.chrUn 343715324 343715324
+hg17.chrX 3816534 3816542
+fr1.chrUn 343715327 343715335
+hg17.chrX 3816544 3816544
+fr1.chrUn 343715337 343715337
+hg17.chrX 3816547 3816549
+fr1.chrUn 343715340 343715342
+hg17.chrX 3816551 3816554
+fr1.chrUn 343715344 343715347
+hg17.chrX 3816556 3816558
+fr1.chrUn 343715349 343715351
+hg17.chrX 3816561 3816561
+fr1.chrUn 343715354 343715354
+hg17.chrX 3816564 3816564
+fr1.chrUn 343715357 343715357
+hg17.chrX 3816568 3816568
+fr1.chrUn 343715361 343715361
+hg17.chrX 3816570 3816571
+fr1.chrUn 343715363 343715364
+hg17.chrX 3816578 3816579
+fr1.chrUn 343715367 343715368
+hg17.chrX 3816582 3816582
+fr1.chrUn 343715371 343715371
+hg17.chrX 3816586 3816591
+fr1.chrUn 343715375 343715380
+hg17.chrX 3816595 3816597
+fr1.chrUn 343715384 343715386
+hg17.chrX 3816600 3816602
+fr1.chrUn 343715389 343715391
+hg17.chrX 3816604 3816604
+fr1.chrUn 343715393 343715393
+hg17.chrX 3816607 3816607
+fr1.chrUn 343715396 343715396
+hg17.chrX 3816611 3816611
+fr1.chrUn 343715402 343715402
+hg17.chrX 3816614 3816616
+fr1.chrUn 343715405 343715407
+hg17.chrX 3816619 3816621
+fr1.chrUn 343715410 343715412
+hg17.chrX 3816625 3816625
+fr1.chrUn 343715416 343715416
+hg17.chrX 3816627 3816628
+fr1.chrUn 343715418 343715419
+hg17.chrX 3816632 3816635
+fr1.chrUn 343715423 343715426
+hg17.chrX 3816639 3817164
+fr1.chrUn 343715430 343715959
+hg17.chrX 3816645 3816646
+fr1.chrUn 343715441 343715442
+hg17.chrX 3816649 3816650
+fr1.chrUn 343715445 343715446
+hg17.chrX 3816662 3816662
+fr1.chrUn 343715467 343715467
+hg17.chrX 3816665 3816665
+fr1.chrUn 343715470 343715470
+hg17.chrX 3816667 3816668
+fr1.chrUn 343715472 343715473
+hg17.chrX 3816670 3816670
+fr1.chrUn 343715475 343715475
+hg17.chrX 3816672 3816672
+fr1.chrUn 343715477 343715477
+hg17.chrX 3816674 3816678
+fr1.chrUn 343715479 343715483
+hg17.chrX 3816680 3816682
+fr1.chrUn 343715485 343715487
+hg17.chrX 3816684 3816684
+fr1.chrUn 343715489 343715489
+hg17.chrX 3816687 3816687
+fr1.chrUn 343715492 343715492
+hg17.chrX 3816690 3816690
+fr1.chrUn 343715495 343715495
+hg17.chrX 3816693 3816693
+fr1.chrUn 343715498 343715498
+hg17.chrX 3816695 3816695
+fr1.chrUn 343715500 343715500
+hg17.chrX 3816698 3816699
+fr1.chrUn 343715503 343715504
+hg17.chrX 3816714 3816714
+fr1.chrUn 343715519 343715519
+hg17.chrX 3816720 3816720
+fr1.chrUn 343715525 343715525
+hg17.chrX 3816726 3816727
+fr1.chrUn 343715531 343715532
+hg17.chrX 3816736 3816736
+fr1.chrUn 343715541 343715541
+hg17.chrX 3816741 3816741
+fr1.chrUn 343715546 343715546
+hg17.chrX 3816748 3816750
+fr1.chrUn 343715553 343715555
+hg17.chrX 3816752 3816753
+fr1.chrUn 343715557 343715558
+hg17.chrX 3816756 3816757
+fr1.chrUn 343715561 343715562
+hg17.chrX 3816771 3816772
+fr1.chrUn 343715576 343715577
+hg17.chrX 3816777 3816778
+fr1.chrUn 343715582 343715583
+hg17.chrX 3816780 3816781
+fr1.chrUn 343715585 343715586
+hg17.chrX 3816784 3816784
+fr1.chrUn 343715589 343715589
+hg17.chrX 3816786 3816786
+fr1.chrUn 343715591 343715591
+hg17.chrX 3816789 3816790
+fr1.chrUn 343715594 343715595
+hg17.chrX 3816796 3816797
+fr1.chrUn 343715597 343715598
+hg17.chrX 3816800 3816800
+fr1.chrUn 343715601 343715601
+hg17.chrX 3816805 3816808
+fr1.chrUn 343715606 343715609
+hg17.chrX 3816810 3816811
+fr1.chrUn 343715611 343715612
+hg17.chrX 3816814 3816814
+fr1.chrUn 343715615 343715615
+hg17.chrX 3816818 3816819
+fr1.chrUn 343715619 343715620
+hg17.chrX 3816835 3816835
+fr1.chrUn 343715625 343715625
+hg17.chrX 3816837 3816837
+fr1.chrUn 343715627 343715627
+hg17.chrX 3816841 3816842
+fr1.chrUn 343715631 343715632
+hg17.chrX 3816844 3816846
+fr1.chrUn 343715634 343715636
+hg17.chrX 3816849 3816849
+fr1.chrUn 343715639 343715639
+hg17.chrX 3816853 3816853
+fr1.chrUn 343715643 343715643
+hg17.chrX 3816868 3816868
+fr1.chrUn 343715661 343715661
+hg17.chrX 3816870 3816870
+fr1.chrUn 343715663 343715663
+hg17.chrX 3816878 3816879
+fr1.chrUn 343715671 343715672
+hg17.chrX 3816882 3816882
+fr1.chrUn 343715675 343715675
+hg17.chrX 3816891 3816891
+fr1.chrUn 343715684 343715684
+hg17.chrX 3816894 3816894
+fr1.chrUn 343715687 343715687
+hg17.chrX 3816903 3816903
+fr1.chrUn 343715696 343715696
+hg17.chrX 3816906 3816906
+fr1.chrUn 343715699 343715699
+hg17.chrX 3816909 3816909
+fr1.chrUn 343715702 343715702
+hg17.chrX 3816912 3816912
+fr1.chrUn 343715705 343715705
+hg17.chrX 3816915 3816915
+fr1.chrUn 343715708 343715708
+hg17.chrX 3816918 3816920
+fr1.chrUn 343715711 343715713
+hg17.chrX 3816924 3816924
+fr1.chrUn 343715717 343715717
+hg17.chrX 3816930 3816931
+fr1.chrUn 343715723 343715724
+hg17.chrX 3816935 3816935
+fr1.chrUn 343715728 343715728
+hg17.chrX 3816939 3816939
+fr1.chrUn 343715732 343715732
+hg17.chrX 3816952 3816952
+fr1.chrUn 343715745 343715745
+hg17.chrX 3816958 3816958
+fr1.chrUn 343715751 343715751
+hg17.chrX 3816961 3816961
+fr1.chrUn 343715754 343715754
+hg17.chrX 3816964 3816964
+fr1.chrUn 343715757 343715757
+hg17.chrX 3816966 3816968
+fr1.chrUn 343715759 343715761
+hg17.chrX 3816972 3816972
+fr1.chrUn 343715765 343715765
+hg17.chrX 3816974 3816974
+fr1.chrUn 343715767 343715767
+hg17.chrX 3816976 3816977
+fr1.chrUn 343715769 343715770
+hg17.chrX 3816979 3816980
+fr1.chrUn 343715772 343715773
+hg17.chrX 3795168 3795168
+fr1.chrUn 343710815 343710815
+hg17.chrX 3795170 3795170
+fr1.chrUn 343710817 343710817
+hg17.chrX 3795175 3795175
+fr1.chrUn 343710822 343710822
+hg17.chrX 3795188 3795188
+fr1.chrUn 343710827 343710827
+hg17.chrX 3795192 3795194
+fr1.chrUn 343710831 343710833
+hg17.chrX 3795196 3795198
+fr1.chrUn 343710835 343710837
+hg17.chrX 3795207 3795208
+fr1.chrUn 343710846 343710847
+hg17.chrX 3795210 3795211
+fr1.chrUn 343710849 343710850
+hg17.chrX 3795218 3795222
+fr1.chrUn 343710861 343710865
+hg17.chrX 3795225 3795226
+fr1.chrUn 343710868 343710869
+hg17.chrX 3795229 3795230
+fr1.chrUn 343710874 343710875
+hg17.chrX 3795235 3795235
+fr1.chrUn 343710887 343710887
+hg17.chrX 3795239 3795239
+fr1.chrUn 343710891 343710891
+hg17.chrX 3795241 3795242
+fr1.chrUn 343710893 343710894
+hg17.chrX 3795245 3795251
+fr1.chrUn 343710897 343710903
+hg17.chrX 3795254 3795259
+fr1.chrUn 343710906 343710911
+hg17.chrX 3795265 3795265
+fr1.chrUn 343710917 343710917
+hg17.chrX 3795268 3795268
+fr1.chrUn 343710920 343710920
+hg17.chrX 3795272 3795272
+fr1.chrUn 343710924 343710924
+hg17.chrX 3795274 3795275
+fr1.chrUn 343710926 343710927
+hg17.chrX 3795284 3795284
+fr1.chrUn 343710940 343710940
+hg17.chrX 3795312 3795312
+fr1.chrUn 343710968 343710968
+hg17.chrX 3795317 3795317
+fr1.chrUn 343710973 343710973
+hg17.chrX 3795326 3795326
+fr1.chrUn 343710982 343710982
+hg17.chrX 3795332 3795332
+fr1.chrUn 343710988 343710988
+hg17.chrX 3795336 3795336
+fr1.chrUn 343710992 343710992
+hg17.chrX 3795338 3795338
+fr1.chrUn 343710994 343710994
+hg17.chrX 3795344 3795344
+fr1.chrUn 343711000 343711000
+hg17.chrX 3795350 3795350
+fr1.chrUn 343711006 343711006
+hg17.chrX 3795353 3795353
+fr1.chrUn 343711009 343711009
+hg17.chrX 3795356 3795356
+fr1.chrUn 343711012 343711012
+hg17.chrX 3795359 3795359
+fr1.chrUn 343711015 343711015
+hg17.chrX 3795377 3795377
+fr1.chrUn 343711033 343711033
+hg17.chrX 3795380 3795380
+fr1.chrUn 343711036 343711036
+hg17.chrX 3795383 3795383
+fr1.chrUn 343711039 343711039
+hg17.chrX 3795386 3795386
+fr1.chrUn 343711042 343711042
+hg17.chrX 3795389 3795389
+fr1.chrUn 343711045 343711045
+hg17.chrX 3795398 3795398
+fr1.chrUn 343711054 343711054
+hg17.chrX 3795401 3795401
+fr1.chrUn 343711057 343711057
+hg17.chrX 3795407 3795408
+fr1.chrUn 343711063 343711064
+hg17.chrX 3795416 3795416
+fr1.chrUn 343711072 343711072
+hg17.chrX 3795422 3795422
+fr1.chrUn 343711078 343711078
+hg17.chrX 3795425 3795425
+fr1.chrUn 343711081 343711081
+hg17.chrX 3795434 3795434
+fr1.chrUn 343711090 343711090
+hg17.chrX 3795443 3795443
+fr1.chrUn 343711099 343711099
+hg17.chrX 3795446 3795446
+fr1.chrUn 343711102 343711102
+hg17.chrX 3795449 3795449
+fr1.chrUn 343711105 343711105
+hg17.chrX 3795455 3795455
+fr1.chrUn 343711111 343711111
+hg17.chrX 3795461 3795461
+fr1.chrUn 343711117 343711117
+hg17.chrX 3795464 3795464
+fr1.chrUn 343711120 343711120
+hg17.chrX 3795467 3795467
+fr1.chrUn 343711123 343711123
+hg17.chrX 3795481 3795481
+fr1.chrUn 343711131 343711131
+hg17.chrX 3795483 3795483
+fr1.chrUn 343711133 343711133
+hg17.chrX 3795488 3795488
+fr1.chrUn 343711138 343711138
+hg17.chrX 3795491 3795491
+fr1.chrUn 343711141 343711141
+hg17.chrX 3795493 3795493
+fr1.chrUn 343711143 343711143
+hg17.chrX 3795500 3795501
+fr1.chrUn 343711150 343711151
+hg17.chrX 3795505 3795507
+fr1.chrUn 343711159 343711161
+hg17.chrX 3795511 3795511
+fr1.chrUn 343711165 343711165
+hg17.chrX 3795513 3795513
+fr1.chrUn 343711167 343711167
+hg17.chrX 3795515 3795515
+fr1.chrUn 343711169 343711169
+hg17.chrX 3795521 3795521
+fr1.chrUn 343711175 343711175
+hg17.chrX 3795523 3795523
+fr1.chrUn 343711177 343711177
+hg17.chrX 3787426 3787426
+fr1.chrUn 343708231 343708231
+hg17.chrX 3787430 3787430
+fr1.chrUn 343708235 343708235
+hg17.chrX 3787432 3787432
+fr1.chrUn 343708237 343708237
+hg17.chrX 3787435 3787436
+fr1.chrUn 343708240 343708241
+hg17.chrX 3787440 3787440
+fr1.chrUn 343708245 343708245
+hg17.chrX 3787449 3787449
+fr1.chrUn 343708254 343708254
+hg17.chrX 3787452 3787452
+fr1.chrUn 343708257 343708257
+hg17.chrX 3787461 3787462
+fr1.chrUn 343708266 343708267
+hg17.chrX 3787464 3787464
+fr1.chrUn 343708269 343708269
+hg17.chrX 3787471 3787471
+fr1.chrUn 343708276 343708276
+hg17.chrX 3787473 3787473
+fr1.chrUn 343708278 343708278
+hg17.chrX 3787476 3787477
+fr1.chrUn 343708281 343708282
+hg17.chrX 3787479 3787479
+fr1.chrUn 343708284 343708284
+hg17.chrX 3787491 3787491
+fr1.chrUn 343708296 343708296
+hg17.chrX 3787494 3787494
+fr1.chrUn 343708299 343708299
+hg17.chrX 3787500 3787500
+fr1.chrUn 343708305 343708305
+hg17.chrX 3787503 3787503
+fr1.chrUn 343708308 343708308
+hg17.chrX 3787510 3787510
+fr1.chrUn 343708315 343708315
+hg17.chrX 3787512 3787512
+fr1.chrUn 343708317 343708317
+hg17.chrX 3787515 3787515
+fr1.chrUn 343708320 343708320
+hg17.chrX 3787518 3787518
+fr1.chrUn 343708323 343708323
+hg17.chrX 3787539 3787539
+fr1.chrUn 343708344 343708344
+hg17.chrX 3787545 3787545
+fr1.chrUn 343708350 343708350
+hg17.chrX 3787548 3787548
+fr1.chrUn 343708353 343708353
+hg17.chrX 3787557 3787557
+fr1.chrUn 343708362 343708362
+hg17.chrX 3787561 3787561
+fr1.chrUn 343708366 343708366
+hg17.chrX 3787566 3787566
+fr1.chrUn 343708371 343708371
+hg17.chrX 3787569 3787569
+fr1.chrUn 343708374 343708374
+hg17.chrX 3787572 3787572
+fr1.chrUn 343708377 343708377
+hg17.chrX 3787578 3787578
+fr1.chrUn 343708383 343708383
+hg17.chrX 3787581 3787581
+fr1.chrUn 343708386 343708386
+hg17.chrX 3787584 3787584
+fr1.chrUn 343708389 343708389
+hg17.chrX 3787587 3787587
+fr1.chrUn 343708392 343708392
+hg17.chrX 3787590 3787590
+fr1.chrUn 343708395 343708395
+hg17.chrX 3787285 3787285
+fr1.chrUn 62078708 62078708
+hg17.chrX 3787293 3787296
+fr1.chrUn 62078716 62078719
+hg17.chrX 3787301 3787301
+fr1.chrUn 62078724 62078724
+hg17.chrX 3787303 3787303
+fr1.chrUn 62078726 62078726
+hg17.chrX 3787305 3787307
+fr1.chrUn 62078728 62078730
+hg17.chrX 3787323 3787423
+fr1.chrUn 62078739 62078848
+hg17.chrX 3787326 3787326
+fr1.chrUn 62078741 62078741
+hg17.chrX 3787328 3787328
+fr1.chrUn 62078743 62078743
+hg17.chrX 3787332 3787333
+fr1.chrUn 62078747 62078748
+hg17.chrX 3787335 3787336
+fr1.chrUn 62078750 62078751
+hg17.chrX 3787339 3787339
+fr1.chrUn 62078754 62078754
+hg17.chrX 3787342 3787343
+fr1.chrUn 62078757 62078758
+hg17.chrX 3787346 3787346
+fr1.chrUn 62078761 62078761
+hg17.chrX 3787348 3787448
+fr1.chrUn 62078763 62078872
+hg17.chrX 3787349 3787349
+fr1.chrUn 62078768 62078768
+hg17.chrX 3787355 3787355
+fr1.chrUn 62078774 62078774
+hg17.chrX 3787357 3787358
+fr1.chrUn 62078776 62078777
+hg17.chrX 3787360 3787360
+fr1.chrUn 62078779 62078779
+hg17.chrX 3787364 3787364
+fr1.chrUn 62078783 62078783
+hg17.chrX 3787369 3787369
+fr1.chrUn 62078796 62078796
+hg17.chrX 3787372 3787372
+fr1.chrUn 62078799 62078799
+hg17.chrX 3787378 3787378
+fr1.chrUn 62078810 62078810
+hg17.chrX 3776943 3776944
+fr1.chrUn 343707054 343707055
+hg17.chrX 3776946 3776946
+fr1.chrUn 343707057 343707057
+hg17.chrX 3776948 3776949
+fr1.chrUn 343707059 343707060
+hg17.chrX 3776951 3776951
+fr1.chrUn 343707062 343707062
+hg17.chrX 3776954 3776954
+fr1.chrUn 343707065 343707065
+hg17.chrX 3776957 3776958
+fr1.chrUn 343707068 343707069
+hg17.chrX 3776960 3776961
+fr1.chrUn 343707071 343707072
+hg17.chrX 3776963 3776963
+fr1.chrUn 343707074 343707074
+hg17.chrX 3776965 3776966
+fr1.chrUn 343707076 343707077
+hg17.chrX 3776968 3776969
+fr1.chrUn 343707079 343707080
+hg17.chrX 3776974 3776976
+fr1.chrUn 343707085 343707087
+hg17.chrX 3776980 3776980
+fr1.chrUn 343707091 343707091
+hg17.chrX 3776983 3776986
+fr1.chrUn 343707094 343707097
+hg17.chrX 3776995 3776995
+fr1.chrUn 343707102 343707102
+hg17.chrX 3776997 3776997
+fr1.chrUn 343707104 343707104
+hg17.chrX 3776999 3777000
+fr1.chrUn 343707106 343707107
+hg17.chrX 3777002 3777002
+fr1.chrUn 343707109 343707109
+hg17.chrX 3777005 3777007
+fr1.chrUn 343707112 343707114
+hg17.chrX 3777009 3777010
+fr1.chrUn 343707116 343707117
+hg17.chrX 3777012 3777012
+fr1.chrUn 343707119 343707119
+hg17.chrX 3777014 3777015
+fr1.chrUn 343707121 343707122
+hg17.chrX 3777018 3777018
+fr1.chrUn 343707125 343707125
+hg17.chrX 3777022 3777022
+fr1.chrUn 343707129 343707129
+hg17.chrX 3777024 3777026
+fr1.chrUn 343707131 343707133
+hg17.chrX 3777028 3777028
+fr1.chrUn 343707135 343707135
+hg17.chrX 3777030 3777033
+fr1.chrUn 343707137 343707140
+hg17.chrX 3777035 3777039
+fr1.chrUn 343707142 343707146
+hg17.chrX 3777041 3777041
+fr1.chrUn 343707148 343707148
+hg17.chrX 3777044 3777044
+fr1.chrUn 343707151 343707151
+hg17.chrX 3777046 3777046
+fr1.chrUn 343707153 343707153
+hg17.chrX 3777049 3777050
+fr1.chrUn 343707156 343707157
+hg17.chrX 3777053 3777054
+fr1.chrUn 343707160 343707161
+hg17.chrX 3777056 3777057
+fr1.chrUn 343707163 343707164
+hg17.chrX 3777059 3777059
+fr1.chrUn 343707166 343707166
+hg17.chrX 3777062 3777063
+fr1.chrUn 343707169 343707170
+hg17.chrX 3777065 3777066
+fr1.chrUn 343707172 343707173
+hg17.chrX 3777068 3777068
+fr1.chrUn 343707175 343707175
+hg17.chrX 3777071 3777073
+fr1.chrUn 343707178 343707180
+hg17.chrX 3777076 3777076
+fr1.chrUn 343707185 343707185
+hg17.chrX 3777081 3777081
+fr1.chrUn 343707190 343707190
+hg17.chrX 3777084 3777084
+fr1.chrUn 343707193 343707193
+hg17.chrX 3777087 3777087
+fr1.chrUn 343707196 343707196
+hg17.chrX 3777090 3777090
+fr1.chrUn 343707199 343707199
+hg17.chrX 3777092 3777095
+fr1.chrUn 343707201 343707204
+hg17.chrX 3777099 3777099
+fr1.chrUn 343707208 343707208
+hg17.chrX 3777103 3777103
+fr1.chrUn 343707212 343707212
+hg17.chrX 3777108 3777111
+fr1.chrUn 343707217 343707220
+hg17.chrX 3777119 3777120
+fr1.chrUn 343707228 343707229
+hg17.chrX 3777123 3777124
+fr1.chrUn 343707232 343707233
+hg17.chrX 3777126 3777127
+fr1.chrUn 343707235 343707236
+hg17.chrX 3777129 3777129
+fr1.chrUn 343707238 343707238
+hg17.chrX 3777131 3777132
+fr1.chrUn 343707240 343707241
+hg17.chrX 3777135 3777135
+fr1.chrUn 343707244 343707244
+hg17.chrX 3777139 3777141
+fr1.chrUn 343707248 343707250
+hg17.chrX 3777144 3777144
+fr1.chrUn 343707253 343707253
+hg17.chrX 3777148 3777148
+fr1.chrUn 343707257 343707257
+hg17.chrX 3777153 3777153
+fr1.chrUn 343707262 343707262
+hg17.chrX 3777156 3777156
+fr1.chrUn 343707265 343707265
+hg17.chrX 3777159 3777160
+fr1.chrUn 343707268 343707269
+hg17.chrX 3777162 3777163
+fr1.chrUn 343707271 343707272
+hg17.chrX 3777177 3777178
+fr1.chrUn 343707286 343707287
+hg17.chrX 3777180 3777181
+fr1.chrUn 343707289 343707290
+hg17.chrX 3777186 3777186
+fr1.chrUn 343707295 343707295
+hg17.chrX 3777189 3777189
+fr1.chrUn 343707298 343707298
+hg17.chrX 3777193 3777193
+fr1.chrUn 343707302 343707302
+hg17.chrX 3777198 3777198
+fr1.chrUn 343707307 343707307
+hg17.chrX 3777200 3777200
+fr1.chrUn 343707309 343707309
+hg17.chrX 3777204 3777204
+fr1.chrUn 343707313 343707313
+hg17.chrX 3777206 3777207
+fr1.chrUn 343707315 343707316
+hg17.chrX 3777211 3777211
+fr1.chrUn 343707320 343707320
+hg17.chrX 3777213 3777213
+fr1.chrUn 343707322 343707322
+hg17.chrX 3777216 3777216
+fr1.chrUn 343707325 343707325
+hg17.chrX 3777219 3777219
+fr1.chrUn 343707328 343707328
+hg17.chrX 3760376 3760376
+fr1.chrUn 343706400 343706400
+hg17.chrX 3760382 3760382
+fr1.chrUn 343706406 343706406
+hg17.chrX 3760385 3760385
+fr1.chrUn 343706409 343706409
+hg17.chrX 3760388 3760388
+fr1.chrUn 343706412 343706412
+hg17.chrX 3760391 3760391
+fr1.chrUn 343706415 343706415
+hg17.chrX 3760400 3760400
+fr1.chrUn 343706424 343706424
+hg17.chrX 3760409 3760410
+fr1.chrUn 343706433 343706434
+hg17.chrX 3760415 3760415
+fr1.chrUn 343706439 343706439
+hg17.chrX 3760418 3760418
+fr1.chrUn 343706442 343706442
+hg17.chrX 3760421 3760421
+fr1.chrUn 343706445 343706445
+hg17.chrX 3760430 3760432
+fr1.chrUn 343706454 343706456
+hg17.chrX 3760436 3760436
+fr1.chrUn 343706460 343706460
+hg17.chrX 3760442 3760442
+fr1.chrUn 343706466 343706466
+hg17.chrX 3760445 3760445
+fr1.chrUn 343706469 343706469
+hg17.chrX 3760448 3760448
+fr1.chrUn 343706472 343706472
+hg17.chrX 3760460 3760460
+fr1.chrUn 343706484 343706484
+hg17.chrX 3760465 3760465
+fr1.chrUn 343706489 343706489
+hg17.chrX 3733406 3733406
+fr1.chrUn 303515825 303515825
+hg17.chrX 3733409 3733409
+fr1.chrUn 303515828 303515828
+hg17.chrX 3733413 3733414
+fr1.chrUn 303515832 303515833
+hg17.chrX 3733417 3733419
+fr1.chrUn 303515836 303515838
+hg17.chrX 3733426 3733427
+fr1.chrUn 303515845 303515846
+hg17.chrX 3733429 3733429
+fr1.chrUn 303515848 303515848
+hg17.chrX 3733431 3733431
+fr1.chrUn 303515850 303515850
+hg17.chrX 3733433 3733433
+fr1.chrUn 303515852 303515852
+hg17.chrX 3733436 3733436
+fr1.chrUn 303515855 303515855
+hg17.chrX 3733440 3733440
+fr1.chrUn 303515859 303515859
+hg17.chrX 3733445 3733445
+fr1.chrUn 303515864 303515864
+hg17.chrX 3733454 3733454
+fr1.chrUn 303515871 303515871
+hg17.chrX 3733456 3733457
+fr1.chrUn 303515873 303515874
+hg17.chrX 3733479 3733479
+fr1.chrUn 303515877 303515877
+hg17.chrX 3733484 3733488
+fr1.chrUn 303515882 303515886
+hg17.chrX 3733491 3733491
+fr1.chrUn 303515889 303515889
+hg17.chrX 3733493 3733494
+fr1.chrUn 303515891 303515892
+hg17.chrX 3733496 3733499
+fr1.chrUn 303515894 303515897
+hg17.chrX 3733501 3733501
+fr1.chrUn 303515899 303515899
+hg17.chrX 3733503 3733504
+fr1.chrUn 303515901 303515902
+hg17.chrX 3733506 3733506
+fr1.chrUn 303515904 303515904
+hg17.chrX 3733508 3733508
+fr1.chrUn 303515906 303515906
+hg17.chrX 3733510 3733510
+fr1.chrUn 303515908 303515908
+hg17.chrX 3733519 3733519
+fr1.chrUn 303515910 303515910
+hg17.chrX 3733521 3733521
+fr1.chrUn 303515912 303515912
+hg17.chrX 3733523 3733523
+fr1.chrUn 303515914 303515914
+hg17.chrX 3733528 3733529
+fr1.chrUn 303515919 303515920
+hg17.chrX 3733537 3733538
+fr1.chrUn 303515925 303515926
+hg17.chrX 3733541 3733541
+fr1.chrUn 303515929 303515929
+hg17.chrX 3733543 3733543
+fr1.chrUn 303515931 303515931
+hg17.chrX 3733549 3733549
+fr1.chrUn 303515937 303515937
+hg17.chrX 3733551 3733553
+fr1.chrUn 303515939 303515941
+hg17.chrX 3733555 3733559
+fr1.chrUn 303515943 303515947
+hg17.chrX 3733563 3733564
+fr1.chrUn 303515951 303515952
+hg17.chrX 3733567 3733567
+fr1.chrUn 303515955 303515955
+hg17.chrX 3733569 3733569
+fr1.chrUn 303515957 303515957
+hg17.chrX 3733574 3733574
+fr1.chrUn 303515962 303515962
+hg17.chrX 3733579 3733581
+fr1.chrUn 303515967 303515969
+hg17.chrX 3733591 3733592
+fr1.chrUn 303515979 303515980
+hg17.chrX 3733594 3733596
+fr1.chrUn 303515982 303515984
+hg17.chrX 3733600 3733601
+fr1.chrUn 303515988 303515989
+hg17.chrX 3733607 3733608
+fr1.chrUn 303515995 303515996
+hg17.chrX 3733610 3734086
+fr1.chrUn 303515998 303516442
+hg17.chrX 3733612 3733612
+fr1.chrUn 303516003 303516003
+hg17.chrX 3733614 3733614
+fr1.chrUn 303516005 303516005
+hg17.chrX 3733617 3733618
+fr1.chrUn 303516008 303516009
+hg17.chrX 3733620 3733620
+fr1.chrUn 303516011 303516011
+hg17.chrX 3733623 3733625
+fr1.chrUn 303516014 303516016
+hg17.chrX 3733629 3733632
+fr1.chrUn 303516020 303516023
+hg17.chrX 3733634 3733634
+fr1.chrUn 303516025 303516025
+hg17.chrX 3733636 3733636
+fr1.chrUn 303516027 303516027
+hg17.chrX 3733642 3733642
+fr1.chrUn 303516033 303516033
+hg17.chrX 3733644 3733645
+fr1.chrUn 303516035 303516036
+hg17.chrX 3733647 3733648
+fr1.chrUn 303516038 303516039
+hg17.chrX 3733651 3733651
+fr1.chrUn 303516042 303516042
+hg17.chrX 3733653 3734129
+fr1.chrUn 303516044 303516488
+hg17.chrX 3733657 3733657
+fr1.chrUn 303516053 303516053
+hg17.chrX 3733661 3733662
+fr1.chrUn 303516057 303516058
+hg17.chrX 3733666 3733666
+fr1.chrUn 303516062 303516062
+hg17.chrX 3733670 3733671
+fr1.chrUn 303516066 303516067
+hg17.chrX 3733673 3733673
+fr1.chrUn 303516069 303516069
+hg17.chrX 3733677 3733677
+fr1.chrUn 303516073 303516073
+hg17.chrX 3733680 3733685
+fr1.chrUn 303516076 303516081
+hg17.chrX 3733689 3733692
+fr1.chrUn 303516085 303516088
+hg17.chrX 3733694 3733695
+fr1.chrUn 303516090 303516091
+hg17.chrX 3733697 3733698
+fr1.chrUn 303516093 303516094
+hg17.chrX 3733700 3733704
+fr1.chrUn 303516096 303516100
+hg17.chrX 3733709 3733710
+fr1.chrUn 303516105 303516106
+hg17.chrX 3733715 3733716
+fr1.chrUn 303516111 303516112
+hg17.chrX 3733718 3733718
+fr1.chrUn 303516114 303516114
+hg17.chrX 3733720 3733720
+fr1.chrUn 303516116 303516116
+hg17.chrX 3733723 3733723
+fr1.chrUn 303516119 303516119
+hg17.chrX 3733733 3733733
+fr1.chrUn 303516127 303516127
+hg17.chrX 3733735 3733736
+fr1.chrUn 303516129 303516130
+hg17.chrX 3733741 3733741
+fr1.chrUn 303516135 303516135
+hg17.chrX 3733747 3733748
+fr1.chrUn 303516143 303516144
+hg17.chrX 3733750 3733751
+fr1.chrUn 303516146 303516147
+hg17.chrX 3733753 3733753
+fr1.chrUn 303516149 303516149
+hg17.chrX 3733758 3733762
+fr1.chrUn 303516154 303516158
+hg17.chrX 3733765 3733765
+fr1.chrUn 303516161 303516161
+hg17.chrX 3733767 3733767
+fr1.chrUn 303516163 303516163
+hg17.chrX 3733769 3733769
+fr1.chrUn 303516165 303516165
+hg17.chrX 3733771 3733773
+fr1.chrUn 303516167 303516169
+hg17.chrX 3733775 3733775
+fr1.chrUn 303516171 303516171
+hg17.chrX 3733778 3733778
+fr1.chrUn 303516174 303516174
+hg17.chrX 3733781 3733781
+fr1.chrUn 303516177 303516177
+hg17.chrX 3733787 3734263
+fr1.chrUn 303516183 303516627
+hg17.chrX 3733809 3733810
+fr1.chrUn 303516191 303516192
+hg17.chrX 3733814 3733814
+fr1.chrUn 303516196 303516196
+hg17.chrX 3733819 3733819
+fr1.chrUn 303516206 303516206
+hg17.chrX 3733823 3733823
+fr1.chrUn 303516210 303516210
+hg17.chrX 3733825 3733825
+fr1.chrUn 303516212 303516212
+hg17.chrX 3733829 3733830
+fr1.chrUn 303516216 303516217
+hg17.chrX 3733832 3733832
+fr1.chrUn 303516219 303516219
+hg17.chrX 3733834 3733834
+fr1.chrUn 303516221 303516221
+hg17.chrX 3733836 3733837
+fr1.chrUn 303516223 303516224
+hg17.chrX 3733843 3733846
+fr1.chrUn 303516230 303516233
+hg17.chrX 3733850 3733854
+fr1.chrUn 303516237 303516241
+hg17.chrX 3733856 3733858
+fr1.chrUn 303516243 303516245
+hg17.chrX 3733861 3733861
+fr1.chrUn 303516248 303516248
+hg17.chrX 3733863 3733865
+fr1.chrUn 303516250 303516252
+hg17.chrX 3733869 3733869
+fr1.chrUn 303516256 303516256
+hg17.chrX 3733871 3733874
+fr1.chrUn 303516258 303516261
+hg17.chrX 3733879 3733879
+fr1.chrUn 303516266 303516266
+hg17.chrX 3731359 3731359
+fr1.chrUn 303515728 303515728
+hg17.chrX 3731361 3731361
+fr1.chrUn 303515730 303515730
+hg17.chrX 3731363 3731363
+fr1.chrUn 303515732 303515732
+hg17.chrX 3731365 3731366
+fr1.chrUn 303515734 303515735
+hg17.chrX 3731368 3731368
+fr1.chrUn 303515737 303515737
+hg17.chrX 3731376 3731376
+fr1.chrUn 303515739 303515739
+hg17.chrX 3731378 3731378
+fr1.chrUn 303515741 303515741
+hg17.chrX 3731381 3731382
+fr1.chrUn 303515744 303515745
+hg17.chrX 3731385 3731385
+fr1.chrUn 303515748 303515748
+hg17.chrX 3731391 3731391
+fr1.chrUn 303515753 303515753
+hg17.chrX 3731395 3731397
+fr1.chrUn 303515757 303515759
+hg17.chrX 3731400 3731400
+fr1.chrUn 303515762 303515762
+hg17.chrX 3731403 3731407
+fr1.chrUn 303515765 303515769
+hg17.chrX 3731410 3731410
+fr1.chrUn 303515772 303515772
+hg17.chrX 3731412 3731415
+fr1.chrUn 303515774 303515777
+hg17.chrX 3731419 3731419
+fr1.chrUn 303515781 303515781
+hg17.chrX 3731430 3731430
+fr1.chrUn 303515786 303515786
+hg17.chrX 3731433 3731433
+fr1.chrUn 303515789 303515789
+hg17.chrX 3731435 3731435
+fr1.chrUn 303515791 303515791
+hg17.chrX 3731439 3731439
+fr1.chrUn 303515795 303515795
+hg17.chrX 3731441 3731443
+fr1.chrUn 303515797 303515799
+hg17.chrX 3731446 3731446
+fr1.chrUn 303515802 303515802
+hg17.chrX 3731449 3731449
+fr1.chrUn 303515805 303515805
+hg17.chrX 3730593 3730593
+fr1.chrUn 303515380 303515380
+hg17.chrX 3730596 3730597
+fr1.chrUn 303515383 303515384
+hg17.chrX 3730600 3730600
+fr1.chrUn 303515387 303515387
+hg17.chrX 3730602 3730602
+fr1.chrUn 303515389 303515389
+hg17.chrX 3730604 3730608
+fr1.chrUn 303515391 303515395
+hg17.chrX 3730610 3730612
+fr1.chrUn 303515397 303515399
+hg17.chrX 3730618 3730618
+fr1.chrUn 303515405 303515405
+hg17.chrX 3730622 3730623
+fr1.chrUn 303515409 303515410
+hg17.chrX 3730628 3730628
+fr1.chrUn 303515415 303515415
+hg17.chrX 3730630 3730631
+fr1.chrUn 303515417 303515418
+hg17.chrX 3730633 3730633
+fr1.chrUn 303515420 303515420
+hg17.chrX 3730635 3730635
+fr1.chrUn 303515422 303515422
+hg17.chrX 3730639 3730642
+fr1.chrUn 303515426 303515429
+hg17.chrX 3730644 3730644
+fr1.chrUn 303515433 303515433
+hg17.chrX 3730646 3730647
+fr1.chrUn 303515435 303515436
+hg17.chrX 3730651 3730651
+fr1.chrUn 303515440 303515440
+hg17.chrX 3730659 3730659
+fr1.chrUn 303515448 303515448
+hg17.chrX 3730662 3730662
+fr1.chrUn 303515451 303515451
+hg17.chrX 3730664 3730664
+fr1.chrUn 303515453 303515453
+hg17.chrX 3730666 3730666
+fr1.chrUn 303515455 303515455
+hg17.chrX 3730670 3730670
+fr1.chrUn 303515457 303515457
+hg17.chrX 3730672 3730674
+fr1.chrUn 303515459 303515461
+hg17.chrX 3730681 3731128
+fr1.chrUn 303515468 303515814
+hg17.chrX 3730685 3730685
+fr1.chrUn 303515471 303515471
+hg17.chrX 3730688 3730690
+fr1.chrUn 303515474 303515476
+hg17.chrX 3730694 3730694
+fr1.chrUn 303515480 303515480
+hg17.chrX 3730696 3730696
+fr1.chrUn 303515482 303515482
+hg17.chrX 3730700 3730701
+fr1.chrUn 303515486 303515487
+hg17.chrX 3730703 3730705
+fr1.chrUn 303515489 303515491
+hg17.chrX 3730717 3730717
+fr1.chrUn 303515500 303515500
+hg17.chrX 3730721 3730721
+fr1.chrUn 303515504 303515504
+hg17.chrX 3730723 3730723
+fr1.chrUn 303515506 303515506
+hg17.chrX 3730726 3730728
+fr1.chrUn 303515509 303515511
+hg17.chrX 3730730 3730730
+fr1.chrUn 303515513 303515513
+hg17.chrX 3730732 3730733
+fr1.chrUn 303515515 303515516
+hg17.chrX 3730756 3730756
+fr1.chrUn 303515525 303515525
+hg17.chrX 3730758 3730758
+fr1.chrUn 303515527 303515527
+hg17.chrX 3730760 3730760
+fr1.chrUn 303515529 303515529
+hg17.chrX 3730762 3730762
+fr1.chrUn 303515531 303515531
+hg17.chrX 3730765 3730765
+fr1.chrUn 303515534 303515534
+hg17.chrX 3730774 3730774
+fr1.chrUn 303515540 303515540
+hg17.chrX 3730776 3730776
+fr1.chrUn 303515542 303515542
+hg17.chrX 3730778 3730779
+fr1.chrUn 303515544 303515545
+hg17.chrX 3730790 3730791
+fr1.chrUn 303515550 303515551
+hg17.chrX 3730796 3730796
+fr1.chrUn 303515556 303515556
+hg17.chrX 3730798 3730799
+fr1.chrUn 303515558 303515559
+hg17.chrX 3730802 3730802
+fr1.chrUn 303515562 303515562
+hg17.chrX 3730804 3730804
+fr1.chrUn 303515564 303515564
+hg17.chrX 3730807 3730807
+fr1.chrUn 303515567 303515567
+hg17.chrX 3730810 3730810
+fr1.chrUn 303515570 303515570
+hg17.chrX 3730822 3730822
+fr1.chrUn 303515578 303515578
+hg17.chrX 3730824 3730824
+fr1.chrUn 303515580 303515580
+hg17.chrX 3730828 3730831
+fr1.chrUn 303515584 303515587
+hg17.chrX 3730834 3730834
+fr1.chrUn 303515590 303515590
+hg17.chrX 3730837 3730838
+fr1.chrUn 303515593 303515594
+hg17.chrX 3730841 3730841
+fr1.chrUn 303515597 303515597
+hg17.chrX 3730850 3730850
+fr1.chrUn 303515602 303515602
+hg17.chrX 3730854 3730855
+fr1.chrUn 303515606 303515607
+hg17.chrX 3730857 3730857
+fr1.chrUn 303515609 303515609
+hg17.chrX 3730861 3730861
+fr1.chrUn 303515613 303515613
+hg17.chrX 3730863 3730864
+fr1.chrUn 303515615 303515616
+hg17.chrX 3730876 3730876
+fr1.chrUn 303515624 303515624
+hg17.chrX 3730880 3730880
+fr1.chrUn 303515628 303515628
+hg17.chrX 3730882 3730883
+fr1.chrUn 303515630 303515631
+hg17.chrX 3730885 3730885
+fr1.chrUn 303515633 303515633
+hg17.chrX 3730887 3730889
+fr1.chrUn 303515635 303515637
+hg17.chrX 3730892 3730892
+fr1.chrUn 303515640 303515640
+hg17.chrX 3730928 3730928
+fr1.chrUn 303515646 303515646
+hg17.chrX 3730931 3730931
+fr1.chrUn 303515649 303515649
+hg17.chrX 3730933 3730933
+fr1.chrUn 303515651 303515651
+hg17.chrX 3730936 3730936
+fr1.chrUn 303515654 303515654
+hg17.chrX 3730938 3730938
+fr1.chrUn 303515656 303515656
+hg17.chrX 3730950 3730950
+fr1.chrUn 303515664 303515664
+hg17.chrX 3730952 3730952
+fr1.chrUn 303515666 303515666
+hg17.chrX 3730955 3730955
+fr1.chrUn 303515669 303515669
+hg17.chrX 3730957 3730957
+fr1.chrUn 303515671 303515671
+hg17.chrX 3730959 3730959
+fr1.chrUn 303515673 303515673
+hg17.chrX 3730977 3730977
+fr1.chrUn 303515675 303515675
+hg17.chrX 3730981 3730981
+fr1.chrUn 303515679 303515679
+hg17.chrX 3730984 3730984
+fr1.chrUn 303515682 303515682
+hg17.chrX 3730988 3730988
+fr1.chrUn 303515686 303515686
+hg17.chrX 3730992 3731439
+fr1.chrUn 303515690 303516036
+hg17.chrX 3731005 3731005
+fr1.chrUn 303515693 303515693
+hg17.chrX 3731007 3731007
+fr1.chrUn 303515695 303515695
+hg17.chrX 3731019 3731019
+fr1.chrUn 303515705 303515705
+hg17.chrX 3731024 3731024
+fr1.chrUn 303515710 303515710
+hg17.chrX 3731026 3731027
+fr1.chrUn 303515712 303515713
+hg17.chrX 3731031 3731032
+fr1.chrUn 303515717 303515718
+hg17.chrX 3731034 3731034
+fr1.chrUn 303515720 303515720
+hg17.chrX 3729222 3729223
+fr1.chrUn 343703528 343703529
+hg17.chrX 3729234 3729234
+fr1.chrUn 343703540 343703540
+hg17.chrX 3729237 3729237
+fr1.chrUn 343703543 343703543
+hg17.chrX 3729240 3729240
+fr1.chrUn 343703546 343703546
+hg17.chrX 3729243 3729243
+fr1.chrUn 343703549 343703549
+hg17.chrX 3729246 3729246
+fr1.chrUn 343703552 343703552
+hg17.chrX 3729249 3729249
+fr1.chrUn 343703555 343703555
+hg17.chrX 3729252 3729252
+fr1.chrUn 343703558 343703558
+hg17.chrX 3729257 3729258
+fr1.chrUn 343703563 343703564
+hg17.chrX 3729262 3729264
+fr1.chrUn 343703568 343703570
+hg17.chrX 3729267 3729267
+fr1.chrUn 343703573 343703573
+hg17.chrX 3729270 3729270
+fr1.chrUn 343703576 343703576
+hg17.chrX 3729273 3729273
+fr1.chrUn 343703579 343703579
+hg17.chrX 3729276 3729276
+fr1.chrUn 343703582 343703582
+hg17.chrX 3729279 3729279
+fr1.chrUn 343703585 343703585
+hg17.chrX 3729288 3729288
+fr1.chrUn 343703594 343703594
+hg17.chrX 3729291 3729291
+fr1.chrUn 343703597 343703597
+hg17.chrX 3729295 3729295
+fr1.chrUn 343703601 343703601
+hg17.chrX 3729298 3729298
+fr1.chrUn 343703604 343703604
+hg17.chrX 3729300 3729301
+fr1.chrUn 343703606 343703607
+hg17.chrX 3729303 3729303
+fr1.chrUn 343703609 343703609
+hg17.chrX 3729306 3729306
+fr1.chrUn 343703612 343703612
+hg17.chrX 3729315 3729315
+fr1.chrUn 343703621 343703621
+hg17.chrX 3729324 3729324
+fr1.chrUn 343703630 343703630
+hg17.chrX 3729333 3729333
+fr1.chrUn 343703639 343703639
+hg17.chrX 3729339 3729339
+fr1.chrUn 343703645 343703645
+hg17.chrX 3729342 3729342
+fr1.chrUn 343703648 343703648
+hg17.chrX 3729351 3729351
+fr1.chrUn 343703657 343703657
+hg17.chrX 3729360 3729360
+fr1.chrUn 343703666 343703666
+hg17.chrX 3729363 3729363
+fr1.chrUn 343703669 343703669
+hg17.chrX 3729369 3729369
+fr1.chrUn 343703675 343703675
+hg17.chrX 3729372 3729372
+fr1.chrUn 343703678 343703678
+hg17.chrX 3729375 3729375
+fr1.chrUn 343703681 343703681
+hg17.chrX 3729378 3729378
+fr1.chrUn 343703684 343703684
+hg17.chrX 3729381 3729381
+fr1.chrUn 343703687 343703687
+hg17.chrX 3729390 3729390
+fr1.chrUn 343703696 343703696
+hg17.chrX 3729393 3729393
+fr1.chrUn 343703699 343703699
+hg17.chrX 3729396 3729396
+fr1.chrUn 343703702 343703702
+hg17.chrX 3729402 3729402
+fr1.chrUn 343703708 343703708
+hg17.chrX 3729408 3729409
+fr1.chrUn 343703714 343703715
+hg17.chrX 3729411 3729412
+fr1.chrUn 343703717 343703718
+hg17.chrX 3729417 3729417
+fr1.chrUn 343703723 343703723
+hg17.chrX 3729426 3729426
+fr1.chrUn 343703732 343703732
+hg17.chrX 3729429 3729429
+fr1.chrUn 343703735 343703735
+hg17.chrX 3729432 3729432
+fr1.chrUn 343703738 343703738
+hg17.chrX 3729435 3729435
+fr1.chrUn 343703741 343703741
+hg17.chrX 3729449 3729449
+fr1.chrUn 343703755 343703755
+hg17.chrX 3729452 3729454
+fr1.chrUn 343703758 343703760
+hg17.chrX 3700392 3700392
+fr1.chrUn 241017739 241017739
+hg17.chrX 3700394 3700394
+fr1.chrUn 241017741 241017741
+hg17.chrX 3700396 3700396
+fr1.chrUn 241017743 241017743
+hg17.chrX 3700400 3700401
+fr1.chrUn 241017747 241017748
+hg17.chrX 3700406 3700406
+fr1.chrUn 241017753 241017753
+hg17.chrX 3700409 3700410
+fr1.chrUn 241017756 241017757
+hg17.chrX 3700412 3700412
+fr1.chrUn 241017759 241017759
+hg17.chrX 3700418 3700420
+fr1.chrUn 241017766 241017768
+hg17.chrX 3700425 3700425
+fr1.chrUn 241017774 241017774
+hg17.chrX 3700430 3700431
+fr1.chrUn 241017782 241017783
+hg17.chrX 3700433 3700433
+fr1.chrUn 241017785 241017785
+hg17.chrX 3700438 3700438
+fr1.chrUn 241017790 241017790
+hg17.chrX 3700441 3700441
+fr1.chrUn 241017793 241017793
+hg17.chrX 3700448 3700449
+fr1.chrUn 241017800 241017801
+hg17.chrX 3700451 3700451
+fr1.chrUn 241017803 241017803
+hg17.chrX 3700454 3700460
+fr1.chrUn 241017806 241017812
+hg17.chrX 3700462 3700466
+fr1.chrUn 241017814 241017818
+hg17.chrX 3700469 3700469
+fr1.chrUn 241017821 241017821
+hg17.chrX 3700471 3700472
+fr1.chrUn 241017823 241017824
+hg17.chrX 3700474 3700474
+fr1.chrUn 241017826 241017826
+hg17.chrX 3700477 3700477
+fr1.chrUn 241017829 241017829
+hg17.chrX 3700480 3700787
+fr1.chrUn 241017832 241018162
+hg17.chrX 3700485 3700486
+fr1.chrUn 241017834 241017835
+hg17.chrX 3700489 3700489
+fr1.chrUn 241017838 241017838
+hg17.chrX 3700491 3700491
+fr1.chrUn 241017840 241017840
+hg17.chrX 3700493 3700493
+fr1.chrUn 241017842 241017842
+hg17.chrX 3700496 3700496
+fr1.chrUn 241017845 241017845
+hg17.chrX 3700502 3700502
+fr1.chrUn 241017851 241017851
+hg17.chrX 3700505 3700505
+fr1.chrUn 241017854 241017854
+hg17.chrX 3700511 3700511
+fr1.chrUn 241017860 241017860
+hg17.chrX 3700514 3700514
+fr1.chrUn 241017863 241017863
+hg17.chrX 3700517 3700517
+fr1.chrUn 241017866 241017866
+hg17.chrX 3700520 3700520
+fr1.chrUn 241017869 241017869
+hg17.chrX 3700526 3700526
+fr1.chrUn 241017875 241017875
+hg17.chrX 3700535 3700535
+fr1.chrUn 241017884 241017884
+hg17.chrX 3700547 3700549
+fr1.chrUn 241017896 241017898
+hg17.chrX 3700553 3700553
+fr1.chrUn 241017902 241017902
+hg17.chrX 3700563 3700564
+fr1.chrUn 241017921 241017922
+hg17.chrX 3700566 3700569
+fr1.chrUn 241017924 241017927
+hg17.chrX 3700571 3700571
+fr1.chrUn 241017929 241017929
+hg17.chrX 3700573 3700573
+fr1.chrUn 241017931 241017931
+hg17.chrX 3700579 3700579
+fr1.chrUn 241017937 241017937
+hg17.chrX 3700582 3700582
+fr1.chrUn 241017943 241017943
+hg17.chrX 3700584 3700584
+fr1.chrUn 241017945 241017945
+hg17.chrX 3700589 3700591
+fr1.chrUn 241017950 241017952
+hg17.chrX 3700597 3700597
+fr1.chrUn 241017962 241017962
+hg17.chrX 3700601 3700602
+fr1.chrUn 241017966 241017967
+hg17.chrX 3700604 3700604
+fr1.chrUn 241017969 241017969
+hg17.chrX 3700606 3700606
+fr1.chrUn 241017971 241017971
+hg17.chrX 3700609 3700609
+fr1.chrUn 241017974 241017974
+hg17.chrX 3700611 3700613
+fr1.chrUn 241017976 241017978
+hg17.chrX 3700615 3700615
+fr1.chrUn 241017980 241017980
+hg17.chrX 3700619 3700619
+fr1.chrUn 241017984 241017984
+hg17.chrX 3700622 3700626
+fr1.chrUn 241017987 241017991
+hg17.chrX 3700628 3700628
+fr1.chrUn 241017993 241017993
+hg17.chrX 3700630 3700937
+fr1.chrUn 241017995 241018325
+hg17.chrX 3700636 3700637
+fr1.chrUn 241018004 241018005
+hg17.chrX 3700640 3700640
+fr1.chrUn 241018008 241018008
+hg17.chrX 3700643 3700644
+fr1.chrUn 241018011 241018012
+hg17.chrX 3700646 3700649
+fr1.chrUn 241018014 241018017
+hg17.chrX 3700656 3700656
+fr1.chrUn 241018022 241018022
+hg17.chrX 3700658 3700658
+fr1.chrUn 241018024 241018024
+hg17.chrX 3700663 3700665
+fr1.chrUn 241018029 241018031
+hg17.chrX 3700669 3700669
+fr1.chrUn 241018035 241018035
+hg17.chrX 3700677 3700986
+fr1.chrUn 241018045 241018377
+hg17.chrX 3700681 3700681
+fr1.chrUn 241018051 241018051
+hg17.chrX 3700685 3700686
+fr1.chrUn 241018055 241018056
+hg17.chrX 3700691 3700692
+fr1.chrUn 241018061 241018062
+hg17.chrX 3639443 3639443
+fr1.chrUn 333536352 333536352
+hg17.chrX 3639445 3639445
+fr1.chrUn 333536354 333536354
+hg17.chrX 3639449 3639449
+fr1.chrUn 333536358 333536358
+hg17.chrX 3639452 3639452
+fr1.chrUn 333536361 333536361
+hg17.chrX 3639454 3639456
+fr1.chrUn 333536363 333536365
+hg17.chrX 3639458 3639458
+fr1.chrUn 333536367 333536367
+hg17.chrX 3639468 3639469
+fr1.chrUn 333536381 333536382
+hg17.chrX 3639471 3639471
+fr1.chrUn 333536384 333536384
+hg17.chrX 3639474 3639474
+fr1.chrUn 333536387 333536387
+hg17.chrX 3639476 3639477
+fr1.chrUn 333536389 333536390
+hg17.chrX 3639479 3639479
+fr1.chrUn 333536392 333536392
+hg17.chrX 3639487 3639491
+fr1.chrUn 333536400 333536404
+hg17.chrX 3639493 3639495
+fr1.chrUn 333536406 333536408
+hg17.chrX 3639498 3639498
+fr1.chrUn 333536411 333536411
+hg17.chrX 3639509 3639510
+fr1.chrUn 333536425 333536426
+hg17.chrX 3639512 3639512
+fr1.chrUn 333536428 333536428
+hg17.chrX 3639515 3639515
+fr1.chrUn 333536431 333536431
+hg17.chrX 3639517 3639520
+fr1.chrUn 333536433 333536436
+hg17.chrX 3639522 3639522
+fr1.chrUn 333536438 333536438
+hg17.chrX 3639525 3639525
+fr1.chrUn 333536441 333536441
+hg17.chrX 3639527 3639528
+fr1.chrUn 333536443 333536444
+hg17.chrX 3639532 3639533
+fr1.chrUn 333536451 333536452
+hg17.chrX 3639536 3639536
+fr1.chrUn 333536455 333536455
+hg17.chrX 3639539 3639539
+fr1.chrUn 333536458 333536458
+hg17.chrX 3639545 3639550
+fr1.chrUn 333536464 333536469
+hg17.chrX 3639552 3639552
+fr1.chrUn 333536471 333536471
+hg17.chrX 3639554 3639556
+fr1.chrUn 333536473 333536475
+hg17.chrX 3639563 3639564
+fr1.chrUn 333536480 333536481
+hg17.chrX 3639572 3639572
+fr1.chrUn 333536489 333536489
+hg17.chrX 3639576 3639577
+fr1.chrUn 333536493 333536494
+hg17.chrX 3639579 3639579
+fr1.chrUn 333536496 333536496
+hg17.chrX 3639588 3639588
+fr1.chrUn 333536505 333536505
+hg17.chrX 3639592 3639592
+fr1.chrUn 333536509 333536509
+hg17.chrX 3639598 3639598
+fr1.chrUn 333536515 333536515
+hg17.chrX 3639600 3639600
+fr1.chrUn 333536517 333536517
+hg17.chrX 3639603 3639603
+fr1.chrUn 333536520 333536520
+hg17.chrX 3639606 3639606
+fr1.chrUn 333536523 333536523
+hg17.chrX 3639612 3639612
+fr1.chrUn 333536529 333536529
+hg17.chrX 3639615 3639615
+fr1.chrUn 333536532 333536532
+hg17.chrX 3639622 3639622
+fr1.chrUn 333536539 333536539
+hg17.chrX 3639642 3639642
+fr1.chrUn 333536559 333536559
diff -r 05974294cbf1 -r dabed25dfbaf tool_conf.xml.sample
--- a/tool_conf.xml.sample Sat Sep 20 18:14:24 2008 -0400
+++ b/tool_conf.xml.sample Sun Sep 21 17:36:28 2008 -0400
@@ -128,6 +128,8 @@
<tool file="regVariation/getIndels_2way.xml" />
<tool file="regVariation/getIndels_3way.xml" />
<tool file="regVariation/getIndelRates_3way.xml" />
+ <tool file="regVariation/substitutions.xml" />
+ <tool file="regVariation/substitution_rates.xml" />
</section>
<section name="Multiple regression" id="multReg">
<tool file="regVariation/linear_regression.xml" />
diff -r 05974294cbf1 -r dabed25dfbaf tools/regVariation/substitution_rates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/substitution_rates.py Sun Sep 21 17:36:28 2008 -0400
@@ -0,0 +1,118 @@
+#! /usr/bin/python
+#guruprasad Ananda
+"""
+Estimates substitution rates from pairwise alignments using JC69 model.
+"""
+
+from galaxy import eggs
+from galaxy.tools.util.galaxyops import *
+from galaxy.tools.util import maf_utilities
+import bx.align.maf
+import sys, fileinput
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit()
+
+if len(sys.argv) < 3:
+ stop_err("Incorrect number of arguments.")
+
+inp_file = sys.argv[1]
+out_file = sys.argv[2]
+fout = open(out_file, 'w')
+int_file = sys.argv[3]
+if int_file != "None": #The user has specified an interval file
+ dbkey_i = sys.argv[4]
+ chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] )
+
+
+def rateEstimator(block):
+ global alignlen, mismatches
+
+ src1 = block.components[0].src
+ sequence1 = block.components[0].text
+ start1 = block.components[0].start
+ end1 = block.components[0].end
+ len1 = int(end1)-int(start1)
+ len1_withgap = len(sequence1)
+ mismatch = 0.0
+
+ for seq in range (1,len(block.components)):
+ src2 = block.components[seq].src
+ sequence2 = block.components[seq].text
+ start2 = block.components[seq].start
+ end2 = block.components[seq].end
+ len2 = int(end2)-int(start2)
+ for nt in range(len1_withgap):
+ if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': #Not a gap or masked character
+ if sequence1[nt].upper() != sequence2[nt].upper():
+ mismatch += 1
+
+ if int_file == "None":
+ p = mismatch/min(len1,len2)
+ print >>fout, "%s\t%s\t%s\t%s\t%s\t%s\t%d\t%d\t%.4f" %(src1,start1,end1,src2,start2,end2,min(len1,len2),mismatch,p)
+ else:
+ mismatches += mismatch
+ alignlen += min(len1,len2)
+
+def main():
+ skipped = 0
+ not_pairwise = 0
+
+ if int_file == "None":
+ try:
+ maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
+ except:
+ stop_err("Your MAF file appears to be malformed.")
+ print >>fout, "#Seq1\tStart1\tEnd1\tSeq2\tStart2\tEnd2\tL\tN\tp"
+ for block in maf_reader:
+ if len(block.components) != 2:
+ not_pairwise += 1
+ continue
+ try:
+ rateEstimator(block)
+ except:
+ skipped += 1
+ else:
+ index, index_filename = maf_utilities.build_maf_index( inp_file, species = [dbkey_i] )
+ if index is None:
+ print >> sys.stderr, "Your MAF file appears to be malformed."
+ sys.exit()
+ win = NiceReaderWrapper( fileinput.FileInput( int_file ),
+ chrom_col=chr_col_i,
+ start_col=start_col_i,
+ end_col=end_col_i,
+ strand_col=strand_col_i,
+ fix_strand=True)
+ species=None
+ mincols = 0
+ global alignlen, mismatches
+
+ for interval in win:
+ alignlen = 0
+ mismatches = 0.0
+ src = "%s.%s" % ( dbkey_i, interval.chrom )
+ for block in maf_utilities.get_chopped_blocks_for_region( index, src, interval, species, mincols ):
+ if len(block.components) != 2:
+ not_pairwise += 1
+ continue
+ try:
+ rateEstimator(block)
+ except:
+ skipped += 1
+ if alignlen:
+ p = mismatches/alignlen
+ else:
+ p = 'NA'
+ interval.fields.append(str(alignlen))
+ interval.fields.append(str(mismatches))
+ interval.fields.append(str(p))
+ print >>fout, "\t".join(interval.fields)
+ #num_blocks += 1
+
+ if not_pairwise:
+ print "Skipped %d non-pairwise blocks" %(not_pairwise)
+ if skipped:
+ print "Skipped %d blocks as invalid" %(skipped)
+if __name__ == "__main__":
+ main()
diff -r 05974294cbf1 -r dabed25dfbaf tools/regVariation/substitution_rates.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/substitution_rates.xml Sun Sep 21 17:36:28 2008 -0400
@@ -0,0 +1,61 @@
+<tool id="subRate1" name="Estimate substitution rates " version="1.0.0">
+ <description> for non-coding regions</description>
+ <command interpreter="python">
+ substitution_rates.py
+ $input
+ $out_file1
+ #if $region.type == "win":
+ ${region.input2} ${region.input2.dbkey} ${region.input2.metadata.chromCol},$region.input2.metadata.startCol,$region.input2.metadata.endCol,$region.input2.metadata.strandCol
+ #else:
+ "None"
+ #end if
+ </command>
+ <inputs>
+ <param format="maf" name="input" type="data" label="Select pair-wise alignment data"/>
+ <conditional name="region">
+ <param name="type" type="select" label="Estimate rates corresponding to" multiple="false">
+ <option value="align">Alignment block</option>
+ <option value="win">Intervals in your history</option>
+ </param>
+ <when value="win">
+ <param format="interval" name="input2" type="data" label="Choose intervals">
+ <validator type="unspecified_build" />
+ </param>
+ </when>
+ <when value="align" />
+ </conditional>
+ </inputs>
+ <outputs>
+ <data format="tabular" name="out_file1" metadata_source="input"/>
+ </outputs>
+
+ <tests>
+ <test>
+ <param name="input" value="Interval2Maf_pairwise_out.maf"/>
+ <param name="type" value="align"/>
+ <output name="out_file1" file="subRates1.out"/>
+ </test>
+ </tests>
+
+ <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool takes a pairwise MAF file as input and estimates substitution rate according to Jukes-Cantor JC69 model. The 3 new columns appended to the output are explanied below:
+
+- L: number of nucleotides compared
+- N: number of different nucleotides
+- p = N/L
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+Any block/s not containing exactly two sequences, will be omitted.
+
+ </help>
+</tool>
\ No newline at end of file
diff -r 05974294cbf1 -r dabed25dfbaf tools/regVariation/substitutions.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/substitutions.py Sun Sep 21 17:36:28 2008 -0400
@@ -0,0 +1,87 @@
+#! /usr/bin/python
+#Guruprasad ANanda
+"""
+Fetches substitutions from pairwise alignments.
+"""
+
+from galaxy import eggs
+
+from galaxy.tools.util import maf_utilities
+
+import bx.align.maf
+import sys
+import os, fileinput
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit()
+
+if len(sys.argv) < 3:
+ stop_err("Incorrect number of arguments.")
+
+inp_file = sys.argv[1]
+out_file = sys.argv[2]
+fout = open(out_file, 'w')
+
+def fetchSubs(block):
+
+ src1 = block.components[0].src
+ sequence1 = block.components[0].text
+ start1 = block.components[0].start
+ end1 = block.components[0].end
+ len1 = int(end1)-int(start1)
+ len1_withgap = len(sequence1)
+
+ for seq in range (1,len(block.components)):
+ src2 = block.components[seq].src
+ sequence2 = block.components[seq].text
+ start2 = block.components[seq].start
+ end2 = block.components[seq].end
+ len2 = int(end2)-int(start2)
+ sub_begin = None
+ sub_end = None
+ begin = False
+
+ for nt in range(len1_withgap):
+ if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': #Not a gap or masked character
+ if sequence1[nt].upper() != sequence2[nt].upper():
+ if not(begin):
+ sub_begin = nt
+ begin = True
+ sub_end = nt
+ else:
+ if begin:
+ print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),start1+sub_end-sequence1[0:sub_end].count('-'))
+ print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),start2+sub_end-sequence2[0:sub_end].count('-'))
+ begin = False
+
+ else:
+ if begin:
+ print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),end1+sub_end-sequence1[0:sub_end].count('-'))
+ print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),end2+sub_end-sequence2[0:sub_end].count('-'))
+ begin = False
+ ended = False
+
+
+def main():
+ skipped = 0
+ not_pairwise = 0
+ try:
+ maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
+ except:
+ stop_err("Your MAF file appears to be malformed.")
+ print >>fout, "#Chr\tStart\tEnd"
+ for block in maf_reader:
+ if len(block.components) != 2:
+ not_pairwise += 1
+ continue
+ try:
+ fetchSubs(block)
+ except:
+ skipped += 1
+
+ if not_pairwise:
+ print "Skipped %d non-pairwise blocks" %(not_pairwise)
+ if skipped:
+ print "Skipped %d blocks" %(skipped)
+if __name__ == "__main__":
+ main()
diff -r 05974294cbf1 -r dabed25dfbaf tools/regVariation/substitutions.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/substitutions.xml Sun Sep 21 17:36:28 2008 -0400
@@ -0,0 +1,38 @@
+<tool id="substitutions1" name="Fetch substitutions " version="1.0.0">
+ <description> from pairwise alignments</description>
+ <command interpreter="python">
+ substitutions.py
+ $input
+ $out_file1
+ </command>
+ <inputs>
+ <param format="maf" name="input" type="data" label="Select pair-wise alignment data"/>
+ </inputs>
+ <outputs>
+ <data format="tabular" name="out_file1" metadata_source="input"/>
+ </outputs>
+
+ <tests>
+ <test>
+ <param name="input" value="Interval2Maf_pairwise_out.maf"/>
+ <output name="out_file1" file="subs.out"/>
+ </test>
+ </tests>
+ <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool takes a pairwise MAF file as input and fetches substitutions per alignment block.
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+Any block/s not containing exactly two sequences, will be omitted.
+
+ </help>
+</tool>
\ No newline at end of file
1
0
[hg] galaxy 1520: Fix a bug in shrimp_wrapper and add a tool for...
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
22 Sep '08
details: http://www.bx.psu.edu/hg/galaxy/rev/9ef55e79068b
changeset: 1520:9ef55e79068b
user: wychung
date: Fri Sep 19 12:02:13 2008 -0400
description:
Fix a bug in shrimp_wrapper and add a tool for splitting paired-end reads.
Update datatype/fastqsolexa so the number of sequences is correct.
7 file(s) affected in this change:
lib/galaxy/datatypes/sequence.py
test-data/split_paired_reads_test1.fastq
test-data/split_paired_reads_test1.out1
tool_conf.xml.sample
tools/metag_tools/shrimp_wrapper.py
tools/metag_tools/split_paired_reads.py
tools/metag_tools/split_paired_reads.xml
diffs (216 lines):
diff -r 0f735b21dc12 -r 9ef55e79068b lib/galaxy/datatypes/sequence.py
--- a/lib/galaxy/datatypes/sequence.py Thu Sep 18 16:48:29 2008 -0400
+++ b/lib/galaxy/datatypes/sequence.py Fri Sep 19 12:02:13 2008 -0400
@@ -98,8 +98,8 @@
dataset.peek = data.get_file_peek( dataset.file_name )
count = size = 0
bases_regexp = re.compile("^[NGTAC]*$")
- for line in file( dataset.file_name ):
- if line and line[0] == "@":
+ for i, line in enumerate(file( dataset.file_name )):
+ if line and line[0] == "@" and i % 4 == 0:
count += 1
elif bases_regexp.match(line):
line = line.strip()
diff -r 0f735b21dc12 -r 9ef55e79068b test-data/split_paired_reads_test1.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_paired_reads_test1.fastq Fri Sep 19 12:02:13 2008 -0400
@@ -0,0 +1,21 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758
+GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
++HWI-EAS91_1_30788AAXX:7:21:1542:1758
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+@HWI-EAS91_1_30788AAXX:7:22:1621:462
+ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAAACTAGCCCCAATATCAATCCTATATCAAATCTCACC
++HWI-EAS91_1_30788AAXX:7:22:1621:462
+hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?hhJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh
+@HWI-EAS91_1_30788AAXX:7:45:408:807
+TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTTATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT
++HWI-EAS91_1_30788AAXX:7:45:408:807
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439
+CTAACTCTATTTATTGTATTTCAACTAAAAATCTCATAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG
++HWI-EAS91_1_30788AAXX:7:49:654:1439
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@
+@HWI-EAS91_1_30788AAXX:7:64:947:234
+TATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG
++HWI-EAS91_1_30788AAXX:7:64:947:234
+hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJhhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd
+
diff -r 0f735b21dc12 -r 9ef55e79068b test-data/split_paired_reads_test1.out1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_paired_reads_test1.out1 Fri Sep 19 12:02:13 2008 -0400
@@ -0,0 +1,20 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
++HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:22:1621:462/1
+ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAA
++HWI-EAS91_1_30788AAXX:7:22:1621:462/1
+hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?h
+@HWI-EAS91_1_30788AAXX:7:45:408:807/1
+TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT
++HWI-EAS91_1_30788AAXX:7:45:408:807/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439/1
+CTAACTCTATTTATTGTATTTCAACTAAAAATCTCA
++HWI-EAS91_1_30788AAXX:7:49:654:1439/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:64:947:234/1
+TATCAAAAAAGAATATAATCTGAATCAACACTACAA
++HWI-EAS91_1_30788AAXX:7:64:947:234/1
+hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJ
diff -r 0f735b21dc12 -r 9ef55e79068b tool_conf.xml.sample
--- a/tool_conf.xml.sample Thu Sep 18 16:48:29 2008 -0400
+++ b/tool_conf.xml.sample Fri Sep 19 12:02:13 2008 -0400
@@ -274,6 +274,7 @@
<tool file="metag_tools/short_reads_figure_high_quality_length.xml" />
<tool file="metag_tools/short_reads_trim_seq.xml" />
<tool file="metag_tools/blat_coverage_report.xml" />
+ <tool file="metag_tools/split_paired_reads.xml" />
</section>
<section name="Short Read Mapping" id="solexa_tools">
<tool file="metag_tools/shrimp_wrapper.xml" />
diff -r 0f735b21dc12 -r 9ef55e79068b tools/metag_tools/shrimp_wrapper.py
--- a/tools/metag_tools/shrimp_wrapper.py Thu Sep 18 16:48:29 2008 -0400
+++ b/tools/metag_tools/shrimp_wrapper.py Fri Sep 19 12:02:13 2008 -0400
@@ -162,6 +162,7 @@
readname, endindex = line[1:].split('/')
else:
score = line
+
if score: # the last one
if hits.has_key(readname):
if len(hits[readname]) == hit_per_read:
@@ -182,8 +183,9 @@
match_count = 0
if hit_per_read == 1:
- matches = [ hits[readkey]['1'] ]
- match_count = 1
+ if len(hits[readkey]['1']) == 1:
+ matches = [ hits[readkey]['1'] ]
+ match_count = 1
else:
end1_data = hits[readkey]['1']
end2_data = hits[readkey]['2']
@@ -591,6 +593,7 @@
if os.path.exists(query_qual_end2): os.remove(query_qual_end2)
if os.path.exists(shrimp_log): os.remove(shrimp_log)
+
if __name__ == '__main__': __main__()
diff -r 0f735b21dc12 -r 9ef55e79068b tools/metag_tools/split_paired_reads.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/metag_tools/split_paired_reads.py Fri Sep 19 12:02:13 2008 -0400
@@ -0,0 +1,46 @@
+#! /usr/bin/python
+
+"""
+Split Solexa paired end reads
+"""
+
+import os, sys
+
+if __name__ == '__main__':
+
+ infile = sys.argv[1]
+ outfile_end1 = open(sys.argv[2], 'w')
+ outfile_end2 = open(sys.argv[3], 'w')
+
+ for i, line in enumerate(file(infile)):
+ line = line.rstrip()
+ if not line or line.startswith('#'): continue
+
+ end1 = ''
+ end2 = ''
+
+ line_index = i % 4
+
+ if line_index == 0:
+ end1 = line + '/1'
+ end2 = line + '/2'
+
+ elif line_index == 1:
+ seq_len = len(line)/2
+ end1 = line[0:seq_len]
+ end2 = line[seq_len:]
+
+ elif line_index == 2:
+ end1 = line + '/1'
+ end2 = line + '/2'
+
+ else:
+ qual_len = len(line)/2
+ end1 = line[0:qual_len]
+ end2 = line[qual_len:]
+
+ outfile_end1.write('%s\n' %(end1))
+ outfile_end2.write('%s\n' %(end2))
+
+ outfile_end1.close()
+ outfile_end2.close()
\ No newline at end of file
diff -r 0f735b21dc12 -r 9ef55e79068b tools/metag_tools/split_paired_reads.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/metag_tools/split_paired_reads.xml Fri Sep 19 12:02:13 2008 -0400
@@ -0,0 +1,56 @@
+<tool id="split_paired_reads" name="Split" version="1.0.0">
+ <description>paired-end reads into two ends</description>
+ <command interpreter="python">
+ split_paired_reads.py $input $output1 $output2
+ </command>
+ <inputs>
+ <param name="input" type="data" format="fastqsolexa" label="Your paired-end file" />
+ </inputs>
+ <outputs>
+ <data name="output1" format="fastqsolexa"/>
+ <data name="output2" format="fastqsolexa"/>
+ </outputs>
+ <tests>
+ <test>
+ <param name="input" value="split_paired_reads_test1.fastq" ftype="fastqsolexa" />
+ <output name="output1" file="split_paired_reads_test1.out1" fype="fastqsolexa" />
+ </test>
+ </tests>
+<help>
+
+**What it does**
+
+This tool splits a single paired-end file in half and returns two files with each ends.
+
+-----
+
+**Input formats**
+
+A multiple-fastq file, for example::
+
+ @HWI-EAS91_1_30788AAXX:7:21:1542:1758
+ GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
+ +HWI-EAS91_1_30788AAXX:7:21:1542:1758
+ hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+
+
+-----
+
+**Outputs**
+
+One end::
+
+ @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+ GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
+ +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+ hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+
+The other end::
+
+ @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+ GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
+ +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+ hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+
+</help>
+</tool>
1
0
[hg] galaxy 1521: Merge with b2a9827178e28d93e2a978f64033a556a72...
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
22 Sep '08
details: http://www.bx.psu.edu/hg/galaxy/rev/618210a97e62
changeset: 1521:618210a97e62
user: wychung
date: Fri Sep 19 12:34:51 2008 -0400
description:
Merge with b2a9827178e28d93e2a978f64033a556a72b4c51
0 file(s) affected in this change:
diffs (117 lines):
diff -r 9ef55e79068b -r 618210a97e62 tools/visualization/GMAJ.xml
--- a/tools/visualization/GMAJ.xml Fri Sep 19 12:02:13 2008 -0400
+++ b/tools/visualization/GMAJ.xml Fri Sep 19 12:34:51 2008 -0400
@@ -3,7 +3,10 @@
<command interpreter="python">GMAJ.py $out_file1 $maf_input $gmaj_file $filenames_file</command>
<inputs>
<param name="maf_input" type="data" format="maf" label="Alignment File" optional="False"/>
- <param name="refseq" label="Reference Sequence" value="" type="text" help="Leave empty to allow interactive selection."/>
+ <param name="refseq" label="Reference Sequence" type="select">
+ <option value="first" selected="true">First sequence in each block</option>
+ <option value="any">Any sequence</option>
+ </param>
<repeat name="annotations" title="Annotations">
<conditional name="annotation_style">
<param name="style" type="select" label="Annotation Style" help="If your data is not in a style similar to what is available from Galaxy (and the UCSC table browser), choose 'Basic'.">
@@ -11,7 +14,7 @@
<option value="basic">Basic</option>
</param>
<when value="galaxy">
- <param name="species" type="select" label="Species of Annotation" multiple="False">
+ <param name="species" type="select" label="Species" multiple="False">
<options>
<filter type="data_meta" ref="maf_input" key="species" />
</options>
@@ -21,7 +24,6 @@
<param name="underlays_file" type="data" format="bed,gff" label="Underlays File" optional="True"/>
<param name="repeats_file" type="data" format="bed,gff" label="Repeats File" optional="True"/>
<param name="links_file" type="data" format="bed,gff" label="Links File" optional="True"/>
- <param name="offset" label="Offset" value="0" type="integer"/>
</when>
<when value="basic">
<param name="seq_name" label="Full Sequence Name" value="" type="text">
@@ -44,6 +46,7 @@
<option name="Skipping unsupported paragraph (maf_paragraph)" value="maf_paragraph"/>
<option name="Skipping all reconstruction scores: no species specified (recon_noseq)" value="recon_noseq"/>
<option name="Skipping reconstruction scores in blocks with missing row (recon_missing)" value="recon_missing"/>
+ <option name="The first row in some blocks is not the specified reference sequence (refseq_not_first)" value="refseq_not_first"/>
<option name="Skipping extra MAF File (unused_maf)" value="unused_maf"/>
</option>
<option name="Annotation Files" value="annotations">
@@ -71,12 +74,15 @@
</option>
<option name="Red Flags" value="red">
<option name="Sequence name in annotation file does not match name in MAF (seqname_mismatch)" value="seqname_mismatch"/>
- <option name="BED Start or end < 0 (bed_coord)" value="bed_coord"/>
- <option name="GFF Start or end < 1 (gff_coord)" value="gff_coord"/>
+ <option name="BED start or end < 0 (bed_coord)" value="bed_coord"/>
+ <option name="GFF start or end < 1 (gff_coord)" value="gff_coord"/>
<option name="Missing item name for URL substitution (url_subst)" value="url_subst"/>
</option>
</option>
<option name="Miscellaneous" value="miscellaneous">
+ <option name="No refseq specified; assuming 'first' (default_refseq)" value="default_refseq"/>
+ <option name="One or more bundle entries are not used in parameters file(unused_entry)" value="unused_entry"/>
+ <option name="Skipping blocks for export where reference sequence is hidden or all gaps (export_skip)" value="export_skip"/>
<option name="Possible parse error: token ends with an escaped quote (escaped_quote)" value="escaped_quote"/>
<option name="Draggable panel dividers will not be sticky (no_sticky)" value="no_sticky"/>
</option>
@@ -89,11 +95,7 @@
title = "Galaxy: $maf_input.name"
alignfile = input.maf
-#if $refseq.value:
refseq = $refseq
-#else:
-refseq = any
-#end if
tabext = .bed .gff .gtf
#if $nowarn.value:
nowarn = $nowarn
@@ -102,36 +104,35 @@
#set $seq_count = 0
#for $annotation_count, $annotation in $enumerate( $annotations ):
#if $annotation.annotation_style.style == "galaxy":
-#if $maf_input.metadata.species_chromosomes and $annotation.annotation_style['species'].value in $maf_input.metadata.species_chromosomes and $maf_input.metadata.species_chromosomes[$annotation.annotation_style['species'].value]:
-#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $maf_input.metadata.species_chromosomes[$annotation.annotation_style['species'].value]]
-#set $aliases = [ " %s" % $chrom for $chrom in $maf_input.metadata.species_chromosomes[$annotation.annotation_style['species'].value]]
+#if $maf_input.dataset.metadata.species_chromosomes and $annotation.annotation_style['species'].value in $maf_input.dataset.metadata.species_chromosomes and $maf_input.dataset.metadata.species_chromosomes[$annotation.annotation_style['species'].value]:
+#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $maf_input.dataset.metadata.species_chromosomes[$annotation.annotation_style['species'].value]]
#else:
#set $seq_names = [$annotation.annotation_style['species']]
-#set $aliases = [""]
#end if
#else:
#set $seq_names = [$annotation.annotation_style['seq_name']]
-#set $aliases = [""]
#end if
-#for $seq_name, $alias in $zip( $seq_names, $aliases ):
+#for $seq_name in $seq_names:
seq ${seq_count}:
seqname = $seq_name
#if $annotation.annotation_style['exons_file'].dataset:
-exons = ${annotation_count}.exons.${annotation.annotation_style['exons_file'].extension}$alias
+exons = ${annotation_count}.exons.${annotation.annotation_style['exons_file'].extension}
#end if
#if $annotation.annotation_style['repeats_file'].dataset:
-repeats = ${annotation_count}.repeats.${annotation.annotation_style['repeats_file'].extension}$alias
+repeats = ${annotation_count}.repeats.${annotation.annotation_style['repeats_file'].extension}
#end if
#if $annotation.annotation_style['links_file'].dataset:
-links = ${annotation_count}.links.${annotation.annotation_style['links_file'].extension}$alias
+links = ${annotation_count}.links.${annotation.annotation_style['links_file'].extension}
#end if
#if $annotation.annotation_style['underlays_file'].dataset:
-underlays = ${annotation_count}.underlays.${annotation.annotation_style['underlays_file'].extension}$alias
+underlays = ${annotation_count}.underlays.${annotation.annotation_style['underlays_file'].extension}
#end if
#if $annotation.annotation_style['highlights_file'].dataset:
-highlights = ${annotation_count}.highlights.${annotation.annotation_style['highlights_file'].extension}$alias
+highlights = ${annotation_count}.highlights.${annotation.annotation_style['highlights_file'].extension}
#end if
+#if $annotation.annotation_style.style == "basic":
offset = $annotation.annotation_style['offset']
+#end if
#set $seq_count = $seq_count + 1
#end for
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/b2a9827178e2
changeset: 1519:b2a9827178e2
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Fri Sep 19 12:27:20 2008 -0400
description:
Update GMAJ tool interface.
1 file(s) affected in this change:
tools/visualization/GMAJ.xml
diffs (117 lines):
diff -r 0f735b21dc12 -r b2a9827178e2 tools/visualization/GMAJ.xml
--- a/tools/visualization/GMAJ.xml Thu Sep 18 16:48:29 2008 -0400
+++ b/tools/visualization/GMAJ.xml Fri Sep 19 12:27:20 2008 -0400
@@ -3,7 +3,10 @@
<command interpreter="python">GMAJ.py $out_file1 $maf_input $gmaj_file $filenames_file</command>
<inputs>
<param name="maf_input" type="data" format="maf" label="Alignment File" optional="False"/>
- <param name="refseq" label="Reference Sequence" value="" type="text" help="Leave empty to allow interactive selection."/>
+ <param name="refseq" label="Reference Sequence" type="select">
+ <option value="first" selected="true">First sequence in each block</option>
+ <option value="any">Any sequence</option>
+ </param>
<repeat name="annotations" title="Annotations">
<conditional name="annotation_style">
<param name="style" type="select" label="Annotation Style" help="If your data is not in a style similar to what is available from Galaxy (and the UCSC table browser), choose 'Basic'.">
@@ -11,7 +14,7 @@
<option value="basic">Basic</option>
</param>
<when value="galaxy">
- <param name="species" type="select" label="Species of Annotation" multiple="False">
+ <param name="species" type="select" label="Species" multiple="False">
<options>
<filter type="data_meta" ref="maf_input" key="species" />
</options>
@@ -21,7 +24,6 @@
<param name="underlays_file" type="data" format="bed,gff" label="Underlays File" optional="True"/>
<param name="repeats_file" type="data" format="bed,gff" label="Repeats File" optional="True"/>
<param name="links_file" type="data" format="bed,gff" label="Links File" optional="True"/>
- <param name="offset" label="Offset" value="0" type="integer"/>
</when>
<when value="basic">
<param name="seq_name" label="Full Sequence Name" value="" type="text">
@@ -44,6 +46,7 @@
<option name="Skipping unsupported paragraph (maf_paragraph)" value="maf_paragraph"/>
<option name="Skipping all reconstruction scores: no species specified (recon_noseq)" value="recon_noseq"/>
<option name="Skipping reconstruction scores in blocks with missing row (recon_missing)" value="recon_missing"/>
+ <option name="The first row in some blocks is not the specified reference sequence (refseq_not_first)" value="refseq_not_first"/>
<option name="Skipping extra MAF File (unused_maf)" value="unused_maf"/>
</option>
<option name="Annotation Files" value="annotations">
@@ -71,12 +74,15 @@
</option>
<option name="Red Flags" value="red">
<option name="Sequence name in annotation file does not match name in MAF (seqname_mismatch)" value="seqname_mismatch"/>
- <option name="BED Start or end < 0 (bed_coord)" value="bed_coord"/>
- <option name="GFF Start or end < 1 (gff_coord)" value="gff_coord"/>
+ <option name="BED start or end < 0 (bed_coord)" value="bed_coord"/>
+ <option name="GFF start or end < 1 (gff_coord)" value="gff_coord"/>
<option name="Missing item name for URL substitution (url_subst)" value="url_subst"/>
</option>
</option>
<option name="Miscellaneous" value="miscellaneous">
+ <option name="No refseq specified; assuming 'first' (default_refseq)" value="default_refseq"/>
+ <option name="One or more bundle entries are not used in parameters file(unused_entry)" value="unused_entry"/>
+ <option name="Skipping blocks for export where reference sequence is hidden or all gaps (export_skip)" value="export_skip"/>
<option name="Possible parse error: token ends with an escaped quote (escaped_quote)" value="escaped_quote"/>
<option name="Draggable panel dividers will not be sticky (no_sticky)" value="no_sticky"/>
</option>
@@ -89,11 +95,7 @@
title = "Galaxy: $maf_input.name"
alignfile = input.maf
-#if $refseq.value:
refseq = $refseq
-#else:
-refseq = any
-#end if
tabext = .bed .gff .gtf
#if $nowarn.value:
nowarn = $nowarn
@@ -102,36 +104,35 @@
#set $seq_count = 0
#for $annotation_count, $annotation in $enumerate( $annotations ):
#if $annotation.annotation_style.style == "galaxy":
-#if $maf_input.metadata.species_chromosomes and $annotation.annotation_style['species'].value in $maf_input.metadata.species_chromosomes and $maf_input.metadata.species_chromosomes[$annotation.annotation_style['species'].value]:
-#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $maf_input.metadata.species_chromosomes[$annotation.annotation_style['species'].value]]
-#set $aliases = [ " %s" % $chrom for $chrom in $maf_input.metadata.species_chromosomes[$annotation.annotation_style['species'].value]]
+#if $maf_input.dataset.metadata.species_chromosomes and $annotation.annotation_style['species'].value in $maf_input.dataset.metadata.species_chromosomes and $maf_input.dataset.metadata.species_chromosomes[$annotation.annotation_style['species'].value]:
+#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $maf_input.dataset.metadata.species_chromosomes[$annotation.annotation_style['species'].value]]
#else:
#set $seq_names = [$annotation.annotation_style['species']]
-#set $aliases = [""]
#end if
#else:
#set $seq_names = [$annotation.annotation_style['seq_name']]
-#set $aliases = [""]
#end if
-#for $seq_name, $alias in $zip( $seq_names, $aliases ):
+#for $seq_name in $seq_names:
seq ${seq_count}:
seqname = $seq_name
#if $annotation.annotation_style['exons_file'].dataset:
-exons = ${annotation_count}.exons.${annotation.annotation_style['exons_file'].extension}$alias
+exons = ${annotation_count}.exons.${annotation.annotation_style['exons_file'].extension}
#end if
#if $annotation.annotation_style['repeats_file'].dataset:
-repeats = ${annotation_count}.repeats.${annotation.annotation_style['repeats_file'].extension}$alias
+repeats = ${annotation_count}.repeats.${annotation.annotation_style['repeats_file'].extension}
#end if
#if $annotation.annotation_style['links_file'].dataset:
-links = ${annotation_count}.links.${annotation.annotation_style['links_file'].extension}$alias
+links = ${annotation_count}.links.${annotation.annotation_style['links_file'].extension}
#end if
#if $annotation.annotation_style['underlays_file'].dataset:
-underlays = ${annotation_count}.underlays.${annotation.annotation_style['underlays_file'].extension}$alias
+underlays = ${annotation_count}.underlays.${annotation.annotation_style['underlays_file'].extension}
#end if
#if $annotation.annotation_style['highlights_file'].dataset:
-highlights = ${annotation_count}.highlights.${annotation.annotation_style['highlights_file'].extension}$alias
+highlights = ${annotation_count}.highlights.${annotation.annotation_style['highlights_file'].extension}
#end if
+#if $annotation.annotation_style.style == "basic":
offset = $annotation.annotation_style['offset']
+#end if
#set $seq_count = $seq_count + 1
#end for
1
0
[hg] galaxy 1522: Adding a new set of toolss to perform multiple...
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
22 Sep '08
details: http://www.bx.psu.edu/hg/galaxy/rev/05974294cbf1
changeset: 1522:05974294cbf1
user: guru
date: Sat Sep 20 18:14:24 2008 -0400
description:
Adding a new set of toolss to perform multiple linear regression analysis.
9 file(s) affected in this change:
test-data/rcve_out.dat
test-data/reg_inp.tab
tool_conf.xml.sample
tools/regVariation/best_regression_subsets.py
tools/regVariation/best_regression_subsets.xml
tools/regVariation/linear_regression.py
tools/regVariation/linear_regression.xml
tools/regVariation/rcve.py
tools/regVariation/rcve.xml
diffs (700 lines):
diff -r 618210a97e62 -r 05974294cbf1 test-data/rcve_out.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rcve_out.dat Sat Sep 20 18:14:24 2008 -0400
@@ -0,0 +1,8 @@
+#Model R-sq RCVE_Terms RCVE_Value
+2 3 4 0.3997 - -
+3 4 0.3319 2 0.1697
+2 4 0.2974 3 0.2561
+2 3 0.3985 4 0.0031
+4 0.1226 2 3 0.6934
+3 0.2733 2 4 0.3164
+2 0.2972 3 4 0.2564
diff -r 618210a97e62 -r 05974294cbf1 test-data/reg_inp.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reg_inp.tab Sat Sep 20 18:14:24 2008 -0400
@@ -0,0 +1,100 @@
+2.04 2.01 1070 5
+2.56 3.40 1254 6
+3.75 3.68 1466 6
+1.10 1.54 706 4
+3.00 3.32 1160 5
+0.05 0.33 756 3
+1.38 0.36 1058 2
+1.50 1.97 1008 7
+1.38 2.03 1104 4
+4.01 2.05 1200 7
+1.50 2.13 896 7
+1.29 1.34 848 3
+1.90 1.51 958 5
+3.11 3.12 1246 6
+1.92 2.14 1106 4
+0.81 2.60 790 5
+1.01 1.90 954 4
+3.66 3.06 1500 6
+2.00 1.60 1046 5
+2.05 1.96 1054 4
+2.60 1.96 1198 6
+2.55 1.56 940 3
+0.38 1.60 456 6
+2.48 1.92 1150 7
+2.74 3.09 636 6
+1.77 0.78 744 5
+1.61 2.12 644 5
+0.99 1.85 842 3
+1.62 1.78 852 5
+2.03 1.03 1170 3
+3.50 3.44 1034 10
+3.18 2.42 1202 5
+2.39 1.74 1018 5
+1.48 1.89 1180 5
+1.54 1.43 952 3
+1.57 1.64 1038 4
+2.46 2.69 1090 6
+2.42 1.79 694 5
+2.11 2.72 1096 6
+2.04 2.15 1114 5
+1.68 2.22 1256 6
+1.64 1.55 1208 5
+2.41 2.34 820 6
+2.10 2.92 1222 4
+1.40 2.10 1120 5
+2.03 1.64 886 4
+1.99 2.83 1126 7
+2.24 1.76 1158 4
+0.45 1.81 676 6
+2.31 2.68 1214 7
+2.41 2.55 1136 6
+2.56 2.70 1264 6
+2.50 1.66 1116 3
+2.92 2.23 1292 4
+2.35 2.01 604 5
+2.82 1.24 854 6
+1.80 1.95 814 6
+1.29 1.73 778 3
+1.68 1.08 800 2
+3.44 3.46 1424 7
+1.90 3.01 950 6
+2.06 0.54 1056 3
+3.30 3.20 956 8
+1.80 1.50 1352 5
+2.00 1.71 852 5
+1.68 1.99 1168 5
+1.94 2.76 970 6
+0.97 1.56 776 4
+1.12 1.78 854 6
+1.31 1.32 1232 5
+1.68 0.87 1140 6
+3.09 1.75 1084 4
+1.87 1.41 954 2
+2.00 2.77 1000 4
+2.39 1.78 1084 4
+1.50 1.34 1058 4
+1.82 1.52 816 5
+1.80 2.97 1146 7
+2.01 1.75 1000 6
+1.88 1.64 856 4
+1.64 1.80 798 4
+2.42 3.37 1324 6
+0.22 1.15 704 6
+2.31 1.72 1222 5
+0.95 2.27 948 6
+1.99 2.85 1182 8
+1.86 2.21 1000 6
+1.79 1.94 910 6
+3.02 4.25 1374 9
+1.85 1.83 1014 6
+1.98 2.75 1420 7
+2.15 1.71 400 6
+1.46 2.20 998 7
+2.29 2.13 776 6
+2.39 2.38 1134 7
+1.80 1.64 772 4
+2.64 1.87 1304 6
+2.08 2.53 1212 4
+0.70 1.78 818 6
+0.89 1.20 864 2
\ No newline at end of file
diff -r 618210a97e62 -r 05974294cbf1 tool_conf.xml.sample
--- a/tool_conf.xml.sample Fri Sep 19 12:34:51 2008 -0400
+++ b/tool_conf.xml.sample Sat Sep 20 18:14:24 2008 -0400
@@ -128,6 +128,11 @@
<tool file="regVariation/getIndels_2way.xml" />
<tool file="regVariation/getIndels_3way.xml" />
<tool file="regVariation/getIndelRates_3way.xml" />
+ </section>
+ <section name="Multiple regression" id="multReg">
+ <tool file="regVariation/linear_regression.xml" />
+ <tool file="regVariation/best_regression_subsets.xml" />
+ <tool file="regVariation/rcve.xml" />
</section>
<section name="Evolution: HyPhy" id="hyphy">
<tool file="hyphy/hyphy_branch_lengths_wrapper.xml" />
diff -r 618210a97e62 -r 05974294cbf1 tools/regVariation/best_regression_subsets.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/best_regression_subsets.py Sat Sep 20 18:14:24 2008 -0400
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+
+from galaxy import eggs
+
+import sys, string
+from rpy import *
+import numpy
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit()
+
+infile = sys.argv[1]
+y_col = int(sys.argv[2])-1
+x_cols = sys.argv[3].split(',')
+outfile = sys.argv[4]
+outfile2 = sys.argv[5]
+print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+fout = open(outfile,'w')
+
+for i, line in enumerate( file ( infile )):
+ line = line.rstrip('\r\n')
+ if len( line )>0 and not line.startswith( '#' ):
+ elems = line.split( '\t' )
+ break
+ if i == 30:
+ break # Hopefully we'll never get here...
+
+if len( elems )<1:
+ stop_err( "The data in your input dataset is either missing or not formatted properly." )
+
+y_vals = []
+x_vals = []
+
+for k,col in enumerate(x_cols):
+ x_cols[k] = int(col)-1
+ x_vals.append([])
+
+NA = 'NA'
+for ind,line in enumerate( file( infile )):
+ if line and not line.startswith( '#' ):
+ try:
+ fields = line.split("\t")
+ try:
+ yval = float(fields[y_col])
+ except Exception, ey:
+ yval = r('NA')
+ y_vals.append(yval)
+ for k,col in enumerate(x_cols):
+ try:
+ xval = float(fields[col])
+ except Exception, ex:
+ xval = r('NA')
+ x_vals[k].append(xval)
+ except:
+ pass
+
+response_term = ""
+
+x_vals1 = numpy.asarray(x_vals).transpose()
+
+dat= r.list(x=array(x_vals1), y=y_vals)
+
+r.library("leaps")
+
+set_default_mode(NO_CONVERSION)
+try:
+ leaps = r.regsubsets(r("y ~ x"), data= r.na_exclude(dat))
+except RException, rex:
+ stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain no numeric values.")
+set_default_mode(BASIC_CONVERSION)
+
+summary = r.summary(leaps)
+tot = len(x_vals)
+pattern = "["
+for i in range(tot):
+ pattern = pattern + 'c' + str(int(x_cols[int(i)]) + 1) + ' '
+pattern = pattern.strip() + ']'
+print >>fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" %(pattern)
+for ind,item in enumerate(summary['outmat']):
+ print >>fout, "%s\t%s\t%s\t%s\t%s\t%s" %(str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind])
+
+
+r.pdf( outfile2, 8, 8 )
+r.plot(leaps, scale="Cp", main="Best subsets using Cp Criterion")
+r.plot(leaps, scale="r2", main="Best subsets using R-sq Criterion")
+r.plot(leaps, scale="adjr2", main="Best subsets using Adjusted R-sq Criterion")
+r.plot(leaps, scale="bic", main="Best subsets using bic Criterion")
+
+r.dev_off()
diff -r 618210a97e62 -r 05974294cbf1 tools/regVariation/best_regression_subsets.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/best_regression_subsets.xml Sat Sep 20 18:14:24 2008 -0400
@@ -0,0 +1,64 @@
+<tool id="BestSubsetsRegression1" name="Perform Best-subsets Regression">
+ <description> </description>
+ <command interpreter="python">
+ best_regression_subsets.py
+ $input1
+ $response_col
+ $predictor_cols
+ $out_file1
+ $out_file2
+ 1>/dev/null
+ 2>/dev/null
+ </command>
+ <inputs>
+ <param format="tabular" name="input1" type="data" label="Select data" help="Query missing? See TIP below."/>
+ <param name="response_col" label="Response column (Y)" type="data_column" data_ref="input1" />
+ <param name="predictor_cols" label="Predictor columns (X)" type="data_column" data_ref="input1" multiple="true" />
+ </inputs>
+ <outputs>
+ <data format="input" name="out_file1" metadata_source="input1" />
+ <data format="pdf" name="out_file2" />
+ </outputs>
+ <requirements>
+ <requirement type="python-module">rpy</requirement>
+ </requirements>
+ <tests>
+ <!-- Testing this tool will not be possible because this tool produces a pdf output file.
+ -->
+ </tests>
+ <help>
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Edit Queries->Convert characters*
+
+-----
+
+.. class:: infomark
+
+**What it does**
+
+This tool uses the 'regsubsets' function from R statistical package for regression subset selection. It outputs two files, one containing a table with the best subsets and the corresponding summary statistics, and the other containing the graphical representation of the results.
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+- This tool currently treats all predictor and response variables as continuous variables.
+
+- Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
+
+- The 6 columns in the output are described below:
+
+ - Column 1 (Vars): denotes the number of variables in the model
+ - Column 2 ([c2 c3 c4...]): represents a list of the user-selected predictor variables (full model). An asterix denotes the presence of the corresponding predictor variable in the selected model.
+ - Column 3 (R-sq): the fraction of variance explained by the model
+ - Column 4 (Adj. R-sq): the above R-squared statistic adjusted, penalizing for higher number of predictors (p)
+ - Column 5 (Cp): Mallow's Cp statistics
+ - Column 6 (bic): Bayesian Information Criterion.
+
+
+ </help>
+</tool>
diff -r 618210a97e62 -r 05974294cbf1 tools/regVariation/linear_regression.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/linear_regression.py Sat Sep 20 18:14:24 2008 -0400
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+
+from galaxy import eggs
+import sys, string
+from rpy import *
+import numpy
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit()
+
+infile = sys.argv[1]
+y_col = int(sys.argv[2])-1
+x_cols = sys.argv[3].split(',')
+outfile = sys.argv[4]
+outfile2 = sys.argv[5]
+
+print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+fout = open(outfile,'w')
+
+for i, line in enumerate( file ( infile )):
+ line = line.rstrip('\r\n')
+ if len( line )>0 and not line.startswith( '#' ):
+ elems = line.split( '\t' )
+ break
+ if i == 30:
+ break # Hopefully we'll never get here...
+
+if len( elems )<1:
+ stop_err( "The data in your input dataset is either missing or not formatted properly." )
+
+y_vals = []
+x_vals = []
+
+for k,col in enumerate(x_cols):
+ x_cols[k] = int(col)-1
+ x_vals.append([])
+
+NA = 'NA'
+for ind,line in enumerate( file( infile )):
+ if line and not line.startswith( '#' ):
+ try:
+ fields = line.split("\t")
+ try:
+ yval = float(fields[y_col])
+ except:
+ yval = r('NA')
+ y_vals.append(yval)
+ for k,col in enumerate(x_cols):
+ try:
+ xval = float(fields[col])
+ except:
+ xval = r('NA')
+ x_vals[k].append(xval)
+ except:
+ pass
+
+x_vals1 = numpy.asarray(x_vals).transpose()
+
+dat= r.list(x=array(x_vals1), y=y_vals)
+
+set_default_mode(NO_CONVERSION)
+try:
+ linear_model = r.lm(r("y ~ x"), data = r.na_exclude(dat))
+except RException, rex:
+ stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.")
+set_default_mode(BASIC_CONVERSION)
+
+coeffs=linear_model.as_py()['coefficients']
+yintercept= coeffs['(Intercept)']
+print >>fout, "Y-intercept\t%s" %(yintercept)
+summary = r.summary(linear_model)
+
+co = summary.get('coefficients', 'NA')
+"""
+if len(co) != len(x_vals)+1:
+ stop_err("Stopped performing linear regression on the input data, since one of the predictor columns contains only non-numeric or invalid values.")
+"""
+print >>fout, "p-value (Y-intercept)\t%s" %(co[0][3])
+
+if len(x_vals) == 1: #Simple linear regression case with 1 predictor variable
+ try:
+ slope = coeffs['x']
+ except:
+ slope = 'NA'
+ try:
+ pval = co[1][3]
+ except:
+ pval = 'NA'
+ print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope)
+ print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval)
+else: #Multiple regression case with >1 predictors
+ ind=1
+ while ind < len(coeffs.keys()):
+ print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,coeffs['x'+str(ind)])
+ try:
+ pval = co[ind][3]
+ except:
+ pval = 'NA'
+ print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval)
+ ind+=1
+
+print >>fout, "R-squared\t%s" %(summary.get('r.squared','NA'))
+print >>fout, "Adjusted R-squared\t%s" %(summary.get('adj.r.squared','NA'))
+print >>fout, "F-statistic\t%s" %(summary.get('fstatistic','NA'))
+print >>fout, "Sigma\t%s" %(summary.get('sigma','NA'))
+
+r.pdf( outfile2, 8, 8 )
+if len(x_vals) == 1: #Simple linear regression case with 1 predictor variable
+ sub_title = "Slope = %s; Y-int = %s" %(slope,yintercept)
+ r.plot(x=x_vals[0], y=y_vals, xlab="X", ylab="Y", sub=sub_title, main="Scatterplot with regression")
+ r.abline(a=yintercept, b=slope, col="red")
+else:
+ r.pairs(dat, main="Scatterplot Matrix", col="blue")
+
+r.plot(linear_model)
+r.dev_off()
diff -r 618210a97e62 -r 05974294cbf1 tools/regVariation/linear_regression.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/linear_regression.xml Sat Sep 20 18:14:24 2008 -0400
@@ -0,0 +1,62 @@
+<tool id="LinearRegression1" name="Perform Linear Regression">
+ <description> </description>
+ <command interpreter="python">
+ linear_regression.py
+ $input1
+ $response_col
+ $predictor_cols
+ $out_file1
+ $out_file2
+ 1>/dev/null
+ </command>
+ <inputs>
+ <param format="tabular" name="input1" type="data" label="Select data" help="Query missing? See TIP below."/>
+ <param name="response_col" label="Response column (Y)" type="data_column" data_ref="input1" />
+ <param name="predictor_cols" label="Predictor columns (X)" type="data_column" data_ref="input1" multiple="true" />
+ </inputs>
+ <outputs>
+ <data format="input" name="out_file1" metadata_source="input1" />
+ <data format="pdf" name="out_file2" />
+ </outputs>
+ <requirements>
+ <requirement type="python-module">rpy</requirement>
+ </requirements>
+ <tests>
+ <!-- Testing this tool will not be possible because this tool produces a pdf output file.
+ -->
+ </tests>
+ <help>
+
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Edit Queries->Convert characters*
+
+-----
+
+.. class:: infomark
+
+**What it does**
+
+This tool uses the 'lm' function from R statistical package to perform linear regression on the input data. It outputs two files, one containing the summary statistics of the performed regression, and the other containing diagnostic plots to check whether model assumptions are satisfied.
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+- This tool currently treats all predictor and response variables as continuous variables.
+
+- Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
+
+- The summary statistics in the output are described below:
+
+ - sigma: the square root of the estimated variance of the random error (standard error of the residiuals)
+ - R-squared: the fraction of variance explained by the model
+ - Adjusted R-squared: the above R-squared statistic adjusted, penalizing for the number of the predictors (p)
+ - p-value: p-value for the t-test of the null hypothesis that the corresponding slope is equal to zero against the two-sided alternative.
+
+
+ </help>
+</tool>
diff -r 618210a97e62 -r 05974294cbf1 tools/regVariation/rcve.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/rcve.py Sat Sep 20 18:14:24 2008 -0400
@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+
+from galaxy import eggs
+
+import sys, string
+from rpy import *
+import numpy
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit()
+
+def sscombs(s):
+ if len(s) == 1:
+ return [s]
+ else:
+ ssc = sscombs(s[1:])
+ return [s[0]] + [s[0]+comb for comb in ssc] + ssc
+
+
+infile = sys.argv[1]
+y_col = int(sys.argv[2])-1
+x_cols = sys.argv[3].split(',')
+outfile = sys.argv[4]
+
+print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
+fout = open(outfile,'w')
+
+for i, line in enumerate( file ( infile )):
+ line = line.rstrip('\r\n')
+ if len( line )>0 and not line.startswith( '#' ):
+ elems = line.split( '\t' )
+ break
+ if i == 30:
+ break # Hopefully we'll never get here...
+
+if len( elems )<1:
+ stop_err( "The data in your input dataset is either missing or not formatted properly." )
+
+y_vals = []
+x_vals = []
+
+for k,col in enumerate(x_cols):
+ x_cols[k] = int(col)-1
+ x_vals.append([])
+ """
+ try:
+ float( elems[x_cols[k]] )
+ except:
+ try:
+ msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." %( col, elems[x_cols[k]] )
+ except:
+ msg = "This operation cannot be performed on non-numeric data."
+ stop_err( msg )
+ """
+NA = 'NA'
+for ind,line in enumerate( file( infile )):
+ if line and not line.startswith( '#' ):
+ try:
+ fields = line.split("\t")
+ try:
+ yval = float(fields[y_col])
+ except Exception, ey:
+ yval = r('NA')
+ #print >>sys.stderr, "ey = %s" %ey
+ y_vals.append(yval)
+ for k,col in enumerate(x_cols):
+ try:
+ xval = float(fields[col])
+ except Exception, ex:
+ xval = r('NA')
+ #print >>sys.stderr, "ex = %s" %ex
+ x_vals[k].append(xval)
+ except:
+ pass
+
+x_vals1 = numpy.asarray(x_vals).transpose()
+dat= r.list(x=array(x_vals1), y=y_vals)
+
+set_default_mode(NO_CONVERSION)
+try:
+ full = r.lm(r("y ~ x"), data= r.na_exclude(dat)) #full model includes all the predictor variables specified by the user
+except RException, rex:
+ stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain no numeric values.")
+set_default_mode(BASIC_CONVERSION)
+
+summary = r.summary(full)
+fullr2 = summary.get('r.squared','NA')
+
+if fullr2 == 'NA':
+ stop_error("Error in linear regression")
+
+if len(x_vals) < 10:
+ s = ""
+ for ch in range(len(x_vals)):
+ s += str(ch)
+else:
+ stop_err("This tool only works with less than 10 predictors.")
+
+print >>fout, "#Model\tR-sq\tRCVE_Terms\tRCVE_Value"
+all_combos = sorted(sscombs(s), key=len)
+all_combos.reverse()
+for j,cols in enumerate(all_combos):
+ #if len(cols) == len(s): #Same as the full model above
+ # continue
+ if len(cols) == 1:
+ x_vals1 = x_vals[int(cols)]
+ else:
+ x_v = []
+ for col in cols:
+ x_v.append(x_vals[int(col)])
+ x_vals1 = numpy.asarray(x_v).transpose()
+ dat= r.list(x=array(x_vals1), y=y_vals)
+ set_default_mode(NO_CONVERSION)
+ red = r.lm(r("y ~ x"), data= dat) #Reduced model
+ set_default_mode(BASIC_CONVERSION)
+ summary = r.summary(red)
+ redr2 = summary.get('r.squared','NA')
+ try:
+ rcve = (float(fullr2)-float(redr2))/float(fullr2)
+ except:
+ rcve = 'NA'
+ col_str = ""
+ for col in cols:
+ col_str = col_str + str(int(x_cols[int(col)]) + 1) + " "
+ col_str.strip()
+ rcve_col_str = ""
+ for col in s:
+ if col not in cols:
+ rcve_col_str = rcve_col_str + str(int(x_cols[int(col)]) + 1) + " "
+ rcve_col_str.strip()
+ if len(cols) == len(s): #full model
+ rcve_col_str = "-"
+ rcve = "-"
+ try:
+ redr2 = "%.4f" %(float(redr2))
+ except:
+ pass
+ try:
+ rcve = "%.4f" %(float(rcve))
+ except:
+ pass
+ print >>fout, "%s\t%s\t%s\t%s" %(col_str,redr2,rcve_col_str,rcve)
diff -r 618210a97e62 -r 05974294cbf1 tools/regVariation/rcve.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/rcve.xml Sat Sep 20 18:14:24 2008 -0400
@@ -0,0 +1,68 @@
+<tool id="rcve1" name="Compute RCVE" version="1.0.0">
+ <description> </description>
+ <command interpreter="python">
+ rcve.py
+ $input1
+ $response_col
+ $predictor_cols
+ $out_file1
+ 1>/dev/null
+ </command>
+ <inputs>
+ <param format="tabular" name="input1" type="data" label="Select data" help="Query missing? See TIP below."/>
+ <param name="response_col" label="Response column (Y)" type="data_column" data_ref="input1" />
+ <param name="predictor_cols" label="Predictor columns (X)" type="data_column" data_ref="input1" multiple="true" />
+ </inputs>
+ <outputs>
+ <data format="input" name="out_file1" metadata_source="input1" />
+ </outputs>
+ <requirements>
+ <requirement type="python-module">rpy</requirement>
+ </requirements>
+ <tests>
+ <!-- Test data with vlid values -->
+ <test>
+ <param name="input1" value="reg_inp.tab"/>
+ <param name="response_col" value="1"/>
+ <param name="predictor_cols" value="2,3,4"/>
+ <output name="out_file1" file="rcve_out.dat"/>
+ </test>
+
+ </tests>
+ <help>
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Edit Queries->Convert characters*
+
+-----
+
+.. class:: infomark
+
+**What it does**
+
+This tool computes the RCVE (Relative Contribution to Variance) for all possible variable subsets using the following formula:
+
+**RCVE(i) = [R-sq (full: 1,2,..,i..,p-1) - R-sq(without i: 1,2,...,p-1)] / R-sq (full: 1,2,..,i..,p-1)**,
+which denotes the case where the 'i'th predictor is dropped.
+
+
+In general,
+**RCVE(X+) = [R-sq (full: {X,X+}) - R-sq(reduced: {X})] / R-sq (full: {X,X+})**,
+where,
+
+- {X,X+} denotes the set of all predictors,
+- X+ is the set of predictors for which we compute RCVE (and therefore drop from the full model to obtain a reduced one),
+- {X} is the set of the predictors that are left in the reduced model after excluding {X+}
+
+
+The 4 columns in the output are described below:
+
+- Column 1 (Model): denotes the variables present in the model ({X})
+- Column 2 (R-sq): denotes the R-squared value corresponding to the model in Column 1
+- Column 3 (RCVE_Terms): denotes the variable/s for which RCVE is computed ({X+}). These are the variables that are absent in the reduced model in Column 1. A '-' in this column indicates that the model in Column 1 is the Full model.
+- Column 4 (RCVE): denotes the RCVE value corresponding to the variable/s in Column 3. A '-' in this column indicates that the model in Column 1 is the Full model.
+
+
+ </help>
+</tool>
1
0
[hg] galaxy 1518: Add a wrapper for metadata inside of DatasetFi...
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
22 Sep '08
details: http://www.bx.psu.edu/hg/galaxy/rev/0f735b21dc12
changeset: 1518:0f735b21dc12
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Thu Sep 18 16:48:29 2008 -0400
description:
Add a wrapper for metadata inside of DatasetFilenameWrapper to allow proper string substitution in
commandline and templates.
2 file(s) affected in this change:
lib/galaxy/datatypes/metadata.py
lib/galaxy/tools/__init__.py
diffs (56 lines):
diff -r 1d326855ba89 -r 0f735b21dc12 lib/galaxy/datatypes/metadata.py
--- a/lib/galaxy/datatypes/metadata.py Thu Sep 18 15:41:23 2008 -0400
+++ b/lib/galaxy/datatypes/metadata.py Thu Sep 18 16:48:29 2008 -0400
@@ -211,6 +211,9 @@
elif not isinstance(value, list):
MetadataParameter.__setattr__(self, name, [value])
+ def __iter__( self ):
+ return iter( self.value )
+
def __str__(self):
if self.value in [None, []]:
return str(self.spec.no_value)
diff -r 1d326855ba89 -r 0f735b21dc12 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py Thu Sep 18 15:41:23 2008 -0400
+++ b/lib/galaxy/tools/__init__.py Thu Sep 18 16:48:29 2008 -0400
@@ -1177,6 +1177,31 @@
Wraps a dataset so that __str__ returns the filename, but all other
attributes are accessible.
"""
+
+ class MetadataWrapper:
+ """
+ Wraps a Metadata Collection to return MetadataParameters wrapped according to the metadata spec.
+ Methods implemented to match behavior of a Metadata Collection.
+ """
+ def __init__( self, metadata ):
+ self.metadata = metadata
+ def __getattr__( self, name ):
+ rval = self.metadata.get( name, None )
+ if name in self.metadata.spec:
+ rval = self.metadata.spec[name].wrap( rval, self.metadata.parent )
+ return rval
+ def __nonzero__( self ):
+ return self.metadata.__nonzero__()
+ def __iter__( self ):
+ return self.metadata.__iter__()
+ def get( self, key, default=None ):
+ try:
+ return getattr( self, key )
+ except:
+ return default
+ def items( self ):
+ return iter( [ ( k, self.get( k ) ) for k, v in self.metadata.items() ] )
+
def __init__( self, dataset, datatypes_registry = None, tool = None, name = None ):
if not dataset:
try:
@@ -1187,6 +1212,7 @@
self.dataset = NoneDataset( datatypes_registry = datatypes_registry, ext = ext )
else:
self.dataset = dataset
+ self.metadata = self.MetadataWrapper( dataset.metadata )
def __str__( self ):
return self.dataset.file_name
def __getattr__( self, key ):
1
0
[hg] galaxy 1510: Strip whitespace from columns in file for data...
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
22 Sep '08
details: http://www.bx.psu.edu/hg/galaxy/rev/f8e3770c23f6
changeset: 1510:f8e3770c23f6
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Tue Sep 16 14:10:53 2008 -0400
description:
Strip whitespace from columns in file for dataset_metadata_in_file validator.
1 file(s) affected in this change:
lib/galaxy/tools/parameters/validation.py
diffs (12 lines):
diff -r ec547440ec97 -r f8e3770c23f6 lib/galaxy/tools/parameters/validation.py
--- a/lib/galaxy/tools/parameters/validation.py Tue Sep 16 13:25:42 2008 -0400
+++ b/lib/galaxy/tools/parameters/validation.py Tue Sep 16 14:10:53 2008 -0400
@@ -247,7 +247,7 @@
if line_startswith is None or line.startswith( line_startswith ):
fields = line.split( '\t' )
if metadata_column < len( fields ):
- self.valid_values.append( fields[metadata_column] )
+ self.valid_values.append( fields[metadata_column].strip() )
def validate( self, value, history = None ):
if not value: return
if hasattr( value, "metadata" ):
1
0
details: http://www.bx.psu.edu/hg/galaxy/rev/f1da9b95549b
changeset: 1516:f1da9b95549b
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Thu Sep 18 15:24:51 2008 -0400
description:
Update to latest gmaj.
1 file(s) affected in this change:
static/gmaj/gmaj.jar
diffs (2 lines):
diff -r 280e8b68f845 -r f1da9b95549b static/gmaj/gmaj.jar
Binary file static/gmaj/gmaj.jar has changed
1
0
[hg] galaxy 1513: Quick 'n easy solution to the EMBOSS stage in/...
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
22 Sep '08
details: http://www.bx.psu.edu/hg/galaxy/rev/cf17b5a16eff
changeset: 1513:cf17b5a16eff
user: Nate Coraor <nate(a)bx.psu.edu>
date: Wed Sep 17 10:45:20 2008 -0400
description:
Quick 'n easy solution to the EMBOSS stage in/out problem: read the
outputs from the "real" galaxy path instead of the temp stage path.
1 file(s) affected in this change:
lib/galaxy/jobs/runners/pbs.py
diffs (31 lines):
diff -r 1e408bab8941 -r cf17b5a16eff lib/galaxy/jobs/runners/pbs.py
--- a/lib/galaxy/jobs/runners/pbs.py Tue Sep 16 15:23:23 2008 -0400
+++ b/lib/galaxy/jobs/runners/pbs.py Wed Sep 17 10:45:20 2008 -0400
@@ -146,7 +146,7 @@
if self.app.config.pbs_application_server:
pbs_ofile = self.app.config.pbs_application_server + ':' + ofile
pbs_efile = self.app.config.pbs_application_server + ':' + efile
- stagein = self.get_stage_in_out( job_wrapper.get_input_fnames() + job_wrapper.get_output_fnames() )
+ stagein = self.get_stage_in_out( job_wrapper.get_input_fnames() + job_wrapper.get_output_fnames(), symlink=True )
stageout = self.get_stage_in_out( job_wrapper.get_output_fnames() )
job_attrs = pbs.new_attropl(5)
job_attrs[0].name = pbs.ATTR_o
@@ -372,15 +372,15 @@
self.queue.put( self.STOP_SIGNAL )
log.info( "pbs job runner stopped" )
- def get_stage_in_out( self, fnames ):
+ def get_stage_in_out( self, fnames, symlink=False ):
"""Convenience function to create a stagein/stageout list"""
stage = ''
for fname in fnames:
if os.access(fname, os.R_OK):
- if stage != '':
+ if stage:
stage += ','
# pathnames are now absolute
- if self.app.config.pbs_stage_path != '':
+ if symlink and self.app.config.pbs_stage_path:
stage_name = os.path.join(self.app.config.pbs_stage_path, os.path.split(fname)[1])
else:
stage_name = fname
1
0
[hg] galaxy 1512: The MetadataCollection object is now created o...
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
by gregļ¼ scofield.bx.psu.edu 22 Sep '08
22 Sep '08
details: http://www.bx.psu.edu/hg/galaxy/rev/1e408bab8941
changeset: 1512:1e408bab8941
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Tue Sep 16 15:23:23 2008 -0400
description:
The MetadataCollection object is now created only once per dataset object instance (and when datatype is changed), instead of each time dataset.metadata is called.
The 'no_value' attribute for a metadata element's spec is returned when the metadata element's value is None.
2 file(s) affected in this change:
lib/galaxy/datatypes/metadata.py
lib/galaxy/model/__init__.py
diffs (89 lines):
diff -r c3ce08879473 -r 1e408bab8941 lib/galaxy/datatypes/metadata.py
--- a/lib/galaxy/datatypes/metadata.py Tue Sep 16 14:26:14 2008 -0400
+++ b/lib/galaxy/datatypes/metadata.py Tue Sep 16 15:23:23 2008 -0400
@@ -151,9 +151,16 @@
"""
def __init__(self, parent, spec):
self.parent = parent
- self.bunch = parent._metadata or dict()
if spec is None: self.spec = MetadataSpecCollection()
else: self.spec = spec
+
+ #set default metadata values
+ if not self.parent._metadata:
+ self.parent._metadata = {}
+ for name, value in self.spec.items():
+ if name not in self.bunch:
+ self.bunch[name] = value.default
+
def __iter__(self):
return self.bunch.__iter__()
def get( self, key, default=None ):
@@ -168,19 +175,21 @@
def __nonzero__(self):
return self.bunch.__nonzero__()
def __getattr__(self, name):
- if self.bunch.get( name ):
- return self.bunch.get( name )
+ if name == "bunch":
+ return self.parent._metadata
+ rval = self.bunch.get( name )
+ if rval is None:
+ rval = self.spec.get( name, None )
+ if rval:
+ rval = rval.no_value
+ return rval
+ def __setattr__(self, name, value):
+ if name in ["parent","spec"]:
+ self.__dict__[name] = value
+ elif name == "bunch":
+ self.parent._metadata = value
else:
- if self.spec.get(name, None):
- return self.spec[name].default
- else:
- return None
- def __setattr__(self, name, value):
- if name in ["parent","bunch","spec"]:
- self.__dict__[name] = value
- else:
- self.__dict__["bunch"][name] = value
- self.bunch = self.parent._metadata = dict( self.bunch )
+ self.bunch[name] = value
MetadataElement = Statement(MetadataElementSpec)
diff -r c3ce08879473 -r 1e408bab8941 lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py Tue Sep 16 14:26:14 2008 -0400
+++ b/lib/galaxy/model/__init__.py Tue Sep 16 15:23:23 2008 -0400
@@ -113,7 +113,7 @@
self.peek = peek
self.extension = extension
self.designation = designation
- self._metadata = metadata or dict()
+ self.metadata = metadata or dict()
self.dbkey = dbkey
self.deleted = deleted
self.visible = visible
@@ -159,9 +159,9 @@
return datatypes_registry.get_datatype_by_extension( self.extension )
def get_metadata( self ):
- if not self._metadata:
- self._metadata = dict()
- return MetadataCollection( self, self.datatype.metadata_spec )
+ if not hasattr( self, '_metadata_collection' ):
+ self._metadata_collection = MetadataCollection( self, self.datatype.metadata_spec )
+ return self._metadata_collection
def set_metadata( self, bunch ):
# Needs to accept a MetadataCollection, a bunch, or a dict
self._metadata = dict( bunch.items() )
@@ -191,6 +191,8 @@
def change_datatype( self, new_ext ):
self.clear_associated_files()
+ if hasattr( self, '_metadata_collection' ):
+ del self._metadata_collection
datatypes_registry.change_datatype( self, new_ext )
def get_size( self ):
"""Returns the size of the data on disk"""
1
0