[hg] galaxy 1684: Modified sort options in grouping tool to work...
details: http://www.bx.psu.edu/hg/galaxy/rev/c1d3004f0613 changeset: 1684:c1d3004f0613 user: guru date: Mon Dec 22 12:15:02 2008 -0500 description: Modified sort options in grouping tool to work correctly on older and newer versions of unix sort. Also included functional tests. 4 file(s) affected in this change: test-data/groupby_out1.dat test-data/groupby_out2.dat tools/stats/grouping.py tools/stats/grouping.xml diffs (126 lines): diff -r 96f2c4630e62 -r c1d3004f0613 test-data/groupby_out1.dat --- a/test-data/groupby_out1.dat Fri Dec 19 14:25:43 2008 -0500 +++ b/test-data/groupby_out1.dat Mon Dec 22 12:15:02 2008 -0500 @@ -1,21 +1,20 @@ -chr10 55251623.000000 -chr11 87588756.250000 -chr1 148052568.250000 -chr12 38440094.000000 -chr13 112381694.000000 -chr14 98710240.000000 -chr15 41666442.500000 -chr16 206638.000000 -chr18 50562378.250000 -chr19 59226196.750000 -chr20 33504194.750000 -chr2 118341365.500000 -chr21 33160676.750000 -chr2 220209905.500000 -chr22 30471242.250000 -chr5 131612441.500000 -chr6 108564320.750000 -chr7 115958079.000000 -chr8 118881131.000000 -chr9 128842832.750000 -chrX 145194871.500000 +chr1 1.48053e+08 +chr10 5.52516e+07 +chr11 8.75888e+07 +chr12 3.84401e+07 +chr13 1.12382e+08 +chr14 9.87102e+07 +chr15 4.16664e+07 +chr16 206638 +chr18 5.05624e+07 +chr19 5.92262e+07 +chr2 1.69276e+08 +chr20 3.35042e+07 +chr21 3.31607e+07 +chr22 3.04712e+07 +chr5 1.31612e+08 +chr6 1.08564e+08 +chr7 1.15958e+08 +chr8 1.18881e+08 +chr9 1.28843e+08 +chrX 1.45195e+08 diff -r 96f2c4630e62 -r c1d3004f0613 test-data/groupby_out2.dat --- a/test-data/groupby_out2.dat Fri Dec 19 14:25:43 2008 -0500 +++ b/test-data/groupby_out2.dat Mon Dec 22 12:15:02 2008 -0500 @@ -1,2 +1,2 @@ -chr10 1700.00 ['NM_11', 'NM_10', 'test'] -chr22 1533.33 ['NM_17', 'NM_19', 'NM_18'] \ No newline at end of file +chr10 1700 +chr22 1533.33 \ No newline at end of file diff -r 96f2c4630e62 -r c1d3004f0613 tools/stats/grouping.py --- a/tools/stats/grouping.py Fri Dec 19 14:25:43 2008 -0500 +++ b/tools/stats/grouping.py Mon Dec 22 12:15:02 2008 -0500 @@ -69,8 +69,9 @@ start a key at POS1, end it at POS2 (origin 1) In other words, column positions start at 1 rather than 0, so we need to add 1 to group_col. + if POS2 is not specified, the newer versions of sort will consider the entire line for sorting. To prevent this, we set POS2=POS1. """ - command_line = "sort -f -k " + str(group_col+1) + " -o " + tmpfile.name + " " + inputfile + command_line = "sort -f -k " + str(group_col+1) +"," + str(group_col+1) + " -o " + tmpfile.name + " " + inputfile except Exception, exc: stop_err( 'Initialization error -> %s' %str(exc) ) diff -r 96f2c4630e62 -r c1d3004f0613 tools/stats/grouping.xml --- a/tools/stats/grouping.xml Fri Dec 19 14:25:43 2008 -0500 +++ b/tools/stats/grouping.xml Mon Dec 22 12:15:02 2008 -0500 @@ -1,4 +1,4 @@ -<tool id="Grouping1" name="Group" version="1.3.0"> +<tool id="Grouping1" name="Group" version="1.4.0"> <description>data by a column and perform aggregate operation on other columns.</description> <command interpreter="python"> grouping.py @@ -38,28 +38,26 @@ <requirements> <requirement type="python-module">rpy</requirement> </requirements> - <tests> - <!-- Test valid data --> - <!-- TODO: fix this tool so that it works on various platforms - The following test should then work... - <test> - <param name="input1" value="1.bed"/> - <param name="groupcol" value="1"/> - <param name="optype" value="mean"/> - <param name="opcol" value="2"/> - <param name="opround" value="no"/> - <output name="out_file1" file="groupby_out1.dat"/> + <tests> + <!-- Test valid data --> + <test> + <param name="input1" value="1.bed"/> + <param name="groupcol" value="1"/> + <param name="optype" value="mean"/> + <param name="opcol" value="2"/> + <param name="opround" value="no"/> + <output name="out_file1" file="groupby_out1.dat"/> + </test> + + <!-- Test data with an invalid value in a column --> + <test> + <param name="input1" value="1.tabular"/> + <param name="groupcol" value="1"/> + <param name="optype" value="mean"/> + <param name="opcol" value="2"/> + <param name="opround" value="no"/> + <output name="out_file1" file="groupby_out2.dat"/> </test> - --> - <!-- Test data with an invalid value in a column --> - <!-- TODO: fix this test... - <test> - <param name="input1" value="1.tabular"/> - <param name="groupcol" value="1"/> - <param name="operations" value="mean 2,c 3"/> - <output name="out_file1" file="groupby_out2.dat"/> - </test> - --> </tests> <help>
participants (1)
-
Nate Coraor