[hg] galaxy 2781: Adding an option to Group tool to ignore case ...
details: http://www.bx.psu.edu/hg/galaxy/rev/dd50d8d45177 changeset: 2781:dd50d8d45177 user: gua110 date: Fri Sep 25 14:50:34 2009 -0400 description: Adding an option to Group tool to ignore case while grouping. 3 file(s) affected in this change: test-data/groupby_out1.dat tools/stats/grouping.py tools/stats/grouping.xml diffs (122 lines): diff -r 3559f7377b9c -r dd50d8d45177 test-data/groupby_out1.dat --- a/test-data/groupby_out1.dat Fri Sep 25 14:36:12 2009 -0400 +++ b/test-data/groupby_out1.dat Fri Sep 25 14:50:34 2009 -0400 @@ -17,4 +17,4 @@ chr7 1.15958e+08 chr8 1.18881e+08 chr9 1.28843e+08 -chrX 1.45195e+08 +chrx 1.45195e+08 diff -r 3559f7377b9c -r dd50d8d45177 tools/stats/grouping.py --- a/tools/stats/grouping.py Fri Sep 25 14:36:12 2009 -0400 +++ b/tools/stats/grouping.py Fri Sep 25 14:50:34 2009 -0400 @@ -12,13 +12,13 @@ def main(): inputfile = sys.argv[2] - + ignorecase = int(sys.argv[4]) ops = [] cols = [] rounds = [] elems = [] - for var in sys.argv[4:]: + for var in sys.argv[5:]: ops.append(var.split()[0]) cols.append(var.split()[1]) rounds.append(var.split()[2]) @@ -71,7 +71,10 @@ we need to add 1 to group_col. if POS2 is not specified, the newer versions of sort will consider the entire line for sorting. To prevent this, we set POS2=POS1. """ - command_line = "sort -f -k " + str(group_col+1) +"," + str(group_col+1) + " -o " + tmpfile.name + " " + inputfile + case = '' + if ignorecase == 1: + case = '-f' + command_line = "sort -t $'\t' " + case + " -k" + str(group_col+1) +"," + str(group_col+1) + " -o " + tmpfile.name + " " + inputfile except Exception, exc: stop_err( 'Initialization error -> %s' %str(exc) ) @@ -95,6 +98,8 @@ try: fields = line.split("\t") item = fields[group_col] + if ignorecase == 1: + item = item.lower() if prev_item != "": # At this level, we're grouping on values (item and prev_item) in group_col if item == prev_item: diff -r 3559f7377b9c -r dd50d8d45177 tools/stats/grouping.xml --- a/tools/stats/grouping.xml Fri Sep 25 14:36:12 2009 -0400 +++ b/tools/stats/grouping.xml Fri Sep 25 14:50:34 2009 -0400 @@ -1,10 +1,11 @@ -<tool id="Grouping1" name="Group" version="1.7.0"> +<tool id="Grouping1" name="Group" version="1.8.0"> <description>data by a column and perform aggregate operation on other columns.</description> <command interpreter="python"> grouping.py $out_file1 $input1 $groupcol + $ignorecase #for $op in $operations '${op.optype} ${op.opcol} @@ -14,6 +15,9 @@ <inputs> <param format="tabular" name="input1" type="data" label="Select data" help="Query missing? See TIP below."/> <param name="groupcol" label="Group by column" type="data_column" data_ref="input1" /> + <param name="ignorecase" type="boolean" truevalue="1" falsevalue="0"> + <label>Ignore case while grouping?</label> + </param> <repeat name="operations" title="Operation"> <param name="optype" type="select" label="Type"> <option value="mean">Mean</option> @@ -44,6 +48,7 @@ <test> <param name="input1" value="1.bed"/> <param name="groupcol" value="1"/> + <param name="ignorecase" value="true"/> <param name="optype" value="mean"/> <param name="opcol" value="2"/> <param name="opround" value="no"/> @@ -54,6 +59,7 @@ <test> <param name="input1" value="1.tabular"/> <param name="groupcol" value="1"/> + <param name="ignorecase" value="true"/> <param name="optype" value="mean"/> <param name="opcol" value="2"/> <param name="opround" value="no"/> @@ -80,15 +86,22 @@ - For the following input:: - chr22 1000 NM_17 - chr22 2000 NM_18 - chr10 2200 NM_10 - chr10 1200 NM_11 - chr22 1600 NM_19 + chr22 1000 1003 TTT + chr22 2000 2003 aaa + chr10 2200 2203 TTT + chr10 1200 1203 ttt + chr22 1600 1603 AAA -- running this tool with **Group by column 1**, Operations **Mean on column 2** and **Concatenate on column 3** will return:: +- **Grouping on column 4** while ignoring case, and performing operation **Count on column 1** will return:: - chr10 1700.00 NM_11,NM_10 - chr22 1533.33 NM_17,NM_19,NM_18 + AAA 2 + TTT 3 + +- **Grouping on column 4** while not ignoring case, and performing operation **Count on column 1** will return:: + + aaa 1 + AAA 1 + ttt 1 + TTT 2 </help> </tool>
participants (1)
-
Nate Coraor