1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/7f28b9616f70/ changeset: 7f28b9616f70 user: guerler date: 2013-03-08 17:52:37 summary: sort: preserves comments in sorted output affected #: 5 files diff -r 7fe3612562eea76387e88ab7d184fccc3160cb43 -r 7f28b9616f70d4c6515c6fd88061374a2b01a283 test-data/sort_in1.bed --- a/test-data/sort_in1.bed +++ b/test-data/sort_in1.bed @@ -1,3 +1,6 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, diff -r 7fe3612562eea76387e88ab7d184fccc3160cb43 -r 7f28b9616f70d4c6515c6fd88061374a2b01a283 test-data/sort_out1.bed --- a/test-data/sort_out1.bed +++ b/test-data/sort_out1.bed @@ -1,29 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, -chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, -chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, -chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, -chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, -chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, -chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, -chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, -chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, -chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, -chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, -chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, -chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, -chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, -chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, -chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, -chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, -chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, -chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, -chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, -chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, -chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, -chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, -chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, -chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, -chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, diff -r 7fe3612562eea76387e88ab7d184fccc3160cb43 -r 7f28b9616f70d4c6515c6fd88061374a2b01a283 test-data/sort_out2.bed --- a/test-data/sort_out2.bed +++ b/test-data/sort_out2.bed @@ -1,3 +1,18 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, @@ -6,24 +21,12 @@ chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, -chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, -chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, -chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, -chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, -chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, -chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, -chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, -chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, -chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, -chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, -chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, -chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, -chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, diff -r 7fe3612562eea76387e88ab7d184fccc3160cb43 -r 7f28b9616f70d4c6515c6fd88061374a2b01a283 tools/filters/sorter.py --- a/tools/filters/sorter.py +++ b/tools/filters/sorter.py @@ -1,49 +1,59 @@ """ -Sorts tabular data on one or more columns. + Sorts tabular data on one or more columns. All comments of the file are collected + and placed at the beginning of the sorted output file. + + usage: sorter.py [options] + -i, --input: Tabular file to be sorted + -o, --output: Sorted output file + -k, --key: Key (see manual for bash/sort) + + usage: sorter.py input output [key ...] +""" +# 03/05/2013 guerler -usage: %prog [options] - -i, --input=i: Tabular file to be sorted - -o, --out_file1=o: Sorted output file - -c, --column=c: First column to sort on - -s, --style=s: Sort style (numerical or alphabetical) - -r, --order=r: Order (ASC or DESC) +# imports +import os, re, string, sys +from optparse import OptionParser -usage: %prog input out_file1 column style order [column style ...] -""" - -import os, re, string, sys -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from bx.cookbook import doc_optparse - +# error def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit() +# main def main(): - #Parse Command Line - options, args = doc_optparse.parse( __doc__ ) + # define options + parser = OptionParser() + parser.add_option("-i", "--input") + parser.add_option("-o", "--output") + parser.add_option("-k", "--key", action="append") + + # parse + options, args = parser.parse_args() + try: - inputfile = options.input - outputfile = '-o %s' % options.out_file1 - columns = [options.column] - styles = [('','n')[options.style == 'num']] - orders = [('','r')[options.order == 'DESC']] - col_style_orders = sys.argv[6:] - if len(col_style_orders) > 1: - columns.extend([col_style_orders[i] for i in range(0,len(col_style_orders),3)]) - styles.extend([('','n')[col_style_orders[i] == 'num'] for i in range(1,len(col_style_orders),3)]) - orders.extend([('','r')[col_style_orders[i] == 'DESC'] for i in range(2,len(col_style_orders),3)]) - cols = [ '-k%s,%s%s%s'%(columns[i], columns[i], styles[i], orders[i]) for i in range(len(columns)) ] + # retrieve options + input = options.input + output = options.output + key = ["-k" + k for k in options.key] + + # grep comments + grep_comments = "(grep '^#' %s) > %s" % (input, output) + print grep_comments + + # grep and sort columns + sort_columns = "(grep '^[^#]' %s | sort -f -t '\t' %s) >> %s" % (input, ' '.join(key), output) + print sort_columns + + # execute + os.system(grep_comments) + os.system(sort_columns) + except Exception, ex: - stop_err('Error parsing input parameters\n' + str(ex)) + stop_err('Error running sorter.py\n' + str(ex)) - # Launch sort. - cmd = "sort -f -t ' ' %s %s %s" % (' '.join(cols), outputfile, inputfile) - try: - os.system(cmd) - except Exception, ex: - stop_err('Error running sort command\n' + str(ex)) + # exit + sys.exit(0) if __name__ == "__main__": main() diff -r 7fe3612562eea76387e88ab7d184fccc3160cb43 -r 7f28b9616f70d4c6515c6fd88061374a2b01a283 tools/filters/sorter.xml --- a/tools/filters/sorter.xml +++ b/tools/filters/sorter.xml @@ -1,130 +1,61 @@ -<tool id="sort1" name="Sort" version="1.0.1"> - <description>data in ascending or descending order</description> - <command interpreter="python"> - sorter.py - --input=$input - --out_file1=$out_file1 - --column=$column - --style=$style - --order=$order - #for $col in $column_set: - ${col.other_column} - ${col.other_style} - ${col.other_order} - #end for - </command> - <inputs> - <param format="tabular" name="input" type="data" label="Sort Dataset" /> - <param name="column" label="on column" type="data_column" data_ref="input" accept_default="true"/> - <param name="style" type="select" label="with flavor"> - <option value="num">Numerical sort</option> - <option value="alpha">Alphabetical sort</option> - </param> - <param name="order" type="select" label="everything in"> - <option value="DESC">Descending order</option> - <option value="ASC">Ascending order</option> - </param> - <repeat name="column_set" title="Column selection"> - <param name="other_column" label="on column" type="data_column" data_ref="input" accept_default="true" /> - <param name="other_style" type="select" label="with flavor"> - <option value="num">Numerical sort</option> - <option value="alpha">Alphabetical sort</option> - </param> - <param name="other_order" type="select" label="everything in"> - <option value="DESC">Descending order</option> - <option value="ASC">Ascending order</option> - </param> - </repeat> - </inputs> - <outputs> - <data format="input" name="out_file1" metadata_source="input"/> - </outputs> - <tests> - <test> - <param name="input" value="sort_in1.bed"/> - <param name="column" value="1"/> - <param name="style" value="num"/> - <param name="order" value="ASC"/> - <param name="other_column" value="3"/> - <param name="other_style" value="num"/> - <param name="other_order" value="ASC"/> - <output name="out_file1" file="sort_out1.bed"/> - </test> - <test> - <param name="input" value="sort_in1.bed"/> - <param name="column" value="3"/> - <param name="style" value="alpha"/> - <param name="order" value="ASC"/> - <param name="other_column" value="1"/> - <param name="other_style" value="alpha"/> - <param name="other_order" value="ASC"/> - <output name="out_file1" file="sort_out2.bed"/> - </test> - </tests> - <help> +<tool id="sort1" name="Sort" version="1.0.2"> + <description>data in ascending or descending order</description> + <command interpreter="python"> + sorter.py + --input=$input + --output=$output + --key=$column,$column$style$order + #for $col in $column_set: + --key=${col.other_column},${col.other_column}${col.other_style}${col.other_order} + #end for + </command> + <inputs> + <param format="tabular" name="input" type="data" label="Sort Dataset" /> + <param name="column" label="on column" type="data_column" data_ref="input" accept_default="true"/> + <param name="style" type="select" label="with flavor"> + <option value="n">Numerical sort</option> + <option value="">Alphabetical sort</option> + </param> + <param name="order" type="select" label="everything in"> + <option value="r">Descending order</option> + <option value="">Ascending order</option> + </param> + <repeat name="column_set" title="Column selection"> + <param name="other_column" label="on column" type="data_column" data_ref="input" accept_default="true" /> + <param name="other_style" type="select" label="with flavor"> + <option value="n">Numerical sort</option> + <option value="">Alphabetical sort</option> + </param> + <param name="other_order" type="select" label="everything in"> + <option value="r">Descending order</option> + <option value="">Ascending order</option> + </param> + </repeat> + </inputs> + <outputs> + <data format="input" name="output" metadata_source="input"/> + </outputs> + <tests> + <test> + <param name="input" value="sort_in1.bed"/> + <param name="column" value="1"/> + <param name="style" value=""/> + <param name="order" value=""/> + <param name="other_column" value="3"/> + <param name="other_style" value="n"/> + <param name="other_order" value="r"/> + <output name="output" file="sort_out1.bed"/> + </test> + <test> + <param name="input" value="sort_in1.bed"/> + <param name="column" value="1"/> + <param name="style" value=""/> + <param name="order" value=""/> + <param name="other_column" value="3"/> + <param name="other_style" value="n"/> + <param name="other_order" value=""/> + <output name="output" file="sort_out2.bed"/> + </test> + </tests> -.. class:: infomark - -**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* - ------ - -**Syntax** - -This tool sorts the dataset on any number of columns in either ascending or descending order. - -* Numerical sort orders numbers by their magnitude, ignores all characters besides numbers, and evaluates a string of numbers to the value they signify. -* Alphabetical sort is a phonebook type sort based on the conventional order of letters in an alphabet. Each nth letter is compared with the nth letter of other words in the list, starting at the first letter of each word and advancing to the second, third, fourth, and so on, until the order is established. Therefore, in an alphabetical sort, 2 comes after 100 (1 < 2). - ------ - -**Examples** - -The list of numbers 4,17,3,5 collates to 3,4,5,17 by numerical sorting, while it collates to 17,3,4,5 by alphabetical sorting. - -Sorting the following:: - - Q d 7 II jhu 45 - A kk 4 I h 111 - Pd p 1 ktY WS 113 - A g 10 H ZZ 856 - A edf 4 tw b 234 - BBB rt 10 H ZZ 100 - A rew 10 d b 1111 - C sd 19 YH aa 10 - Hah c 23 ver bb 467 - MN gtr 1 a X 32 - N j 9 a T 205 - BBB rrf 10 b Z 134 - odfr ws 6 Weg dew 201 - C f 3 WW SW 34 - A jhg 4 I b 345 - Pd gf 7 Gthe de 567 - rS hty 90 YY LOp 89 - A g 10 H h 43 - A g 4 I h 500 - -on columns 1 (alpha), 3 (num), and 6 (num) in ascending order will yield:: - - A kk 4 I h 111 - A edf 4 tw b 234 - A jhg 4 I b 345 - A g 4 I h 500 - A g 10 H h 43 - A g 10 H ZZ 856 - A rew 10 d b 1111 - BBB rt 10 H ZZ 100 - BBB rrf 10 b Z 134 - C f 3 WW SW 34 - C sd 19 YH aa 10 - Hah c 23 ver bb 467 - MN gtr 1 a X 32 - N j 9 a T 205 - odfr ws 6 Weg dew 201 - Pd p 1 ktY WS 113 - Pd gf 7 Gthe de 567 - Q d 7 II jhu 45 - rS hty 90 YY LOp 89 - - </help></tool> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.