# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User gua110
# Date 1280420469 14400
# Node ID 07e7db914c6d2090ef7579a5e2c0583af50c39f3
# Parent 79baa9583d9390bb6341d3484aa9f55b8e3808f8
Modified Group tool by adding Mode function to the list of aggregate operations.
--- a/tools/stats/grouping.py
+++ b/tools/stats/grouping.py
@@ -23,6 +23,12 @@ def main():
cols.append(var.split()[1])
rounds.append(var.split()[2])
+ if 'Mode' in ops:
+ try:
+ r.library('prettyR')
+ except:
+ stop_err('R package prettyR could not be loaded. Please make sure it is installed.')
+
"""
At this point, ops, cols and rounds will look something like this:
ops: ['mean', 'min', 'c']
@@ -46,9 +52,10 @@ def main():
except:
stop_err( "Group column not specified." )
+ str_ops = ['c', 'length', 'unique', 'random', 'cuniq', 'Mode'] #ops that can handle string/non-numeric inputs
for k,col in enumerate(cols):
col = int(col)-1
- if ops[k] not in ['c', 'length', 'unique', 'random', 'cuniq']:
+ if ops[k] not in str_ops:
# We'll get here only if the user didn't choose 'Concatenate' or 'Count' or 'Count Distinct' or 'pick randmly', which are the
# only aggregation functions that can be used on columns containing strings.
try:
@@ -109,7 +116,7 @@ def main():
valid = True
# Before appending the current value, make sure it is numeric if the
# operation for the column requires it.
- if ops[i] not in ['c','length', 'unique','random','cuniq']:
+ if ops[i] not in str_ops:
try:
float( fields[col].strip())
except:
@@ -128,13 +135,14 @@ def main():
due to the sort on group_col we've applied to the data above.
"""
out_str = prev_item
-
+ multiple_modes = False
+ mode_index = None
for i, op in enumerate( ops ):
if op == 'cuniq':
rfunc = "r.c"
else:
rfunc = "r." + op
- if op not in ['c','length','unique','random','cuniq']:
+ if op not in str_ops:
for j, elem in enumerate( prev_vals[i] ):
prev_vals[i][j] = float( elem )
rout = eval( rfunc )( prev_vals[i] )
@@ -148,7 +156,10 @@ def main():
else:
rand_index = random.randint(0,len(prev_vals[i])-1)
rout = prev_vals[i][rand_index]
-
+
+ if op == 'Mode' and rout == '>1 mode':
+ multiple_modes = True
+ mode_index = i
if op == 'unique':
rfunc = "r.length"
rout = eval( rfunc )( rout )
@@ -165,8 +176,13 @@ def main():
out_str += "\t" + str(rout)
else:
out_str += "\t" + str(rout)
-
- print >>fout, out_str
+ if multiple_modes and mode_index != None:
+ out_str_list = out_str.split('\t')
+ for val in prev_vals[mode_index]:
+ out_str = '\t'.join(out_str_list[:mode_index+1]) + '\t' + str(val) + '\t' + '\t'.join(out_str_list[mode_index+2:])
+ print >>fout, out_str.rstrip('\t')
+ else:
+ print >>fout, out_str
prev_item = item
prev_vals = []
@@ -195,14 +211,15 @@ def main():
# Handle the last grouped value
out_str = prev_item
-
+ multiple_modes = False
+ mode_index = None
for i, op in enumerate(ops):
if op == 'cuniq':
rfunc = "r.c"
else:
rfunc = "r." + op
try:
- if op not in ['c','length','unique','random','cuniq']:
+ if op not in str_ops:
for j, elem in enumerate( prev_vals[i] ):
prev_vals[i][j] = float( elem )
rout = eval( rfunc )( prev_vals[i] )
@@ -216,7 +233,10 @@ def main():
else:
rand_index = random.randint(0,len(prev_vals[i])-1)
rout = prev_vals[i][rand_index]
-
+
+ if op == 'Mode' and rout == '>1 mode':
+ multiple_modes = True
+ mode_index = i
if op == 'unique':
rfunc = "r.length"
rout = eval( rfunc )( rout )
@@ -238,7 +258,13 @@ def main():
if not first_invalid_line:
first_invalid_line = ii+1
- print >>fout, out_str
+ if multiple_modes and mode_index != None:
+ out_str_list = out_str.split('\t')
+ for val in prev_vals[mode_index]:
+ out_str = '\t'.join(out_str_list[:mode_index+1]) + '\t' + str(val) + '\t' + '\t'.join(out_str_list[mode_index+2:])
+ print >>fout, out_str.rstrip('\t')
+ else:
+ print >>fout, out_str
# Generate a useful info message.
msg = "--Group by c%d: " %(group_col+1)
--- a/tools/stats/grouping.xml
+++ b/tools/stats/grouping.xml
@@ -1,4 +1,4 @@
-<tool id="Grouping1" name="Group" version="1.9.0">
+<tool id="Grouping1" name="Group" version="1.9.1"><description>data by a column and perform aggregate operation on other columns.</description><command interpreter="python">
grouping.py
@@ -22,6 +22,7 @@
<param name="optype" type="select" label="Type"><option value="mean">Mean</option><option value="median">Median</option>
+ <option value="Mode">Mode</option><option value="max">Maximum</option><option value="min">Minimum</option><option value="sum">Sum</option>
@@ -77,10 +78,12 @@
**Syntax**
-This tool allows you to group the input dataset by a particular column and perform aggregate functions like Mean, Sum, Max, Min and Concatenate on other columns.
+This tool allows you to group the input dataset by a particular column and perform aggregate functions like Mean, Median, Mode, Sum, Max, Min, Count, Random draw and Concatenate on other columns.
- All invalid, blank and comment lines are skipped when performing the aggregate functions. The number of skipped lines is displayed in the resulting history item.
+- If multiple modes are present, all are reported.
+
-----
**Example**
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User Nate Coraor <nate(a)bx.psu.edu>
# Date 1280416580 14400
# Node ID c653ccfa1a1ef7c2a384ee4b3ab72da0b391ae14
# Parent ab48c0e20a948f310ad3b072c23560faa8433aa3
Allow access to /api without HTTP_REMOTE_USER set if use_remote_user = True, since the API controllers handle authentication internally.
--- a/lib/galaxy/web/framework/middleware/remoteuser.py
+++ b/lib/galaxy/web/framework/middleware/remoteuser.py
@@ -75,8 +75,8 @@ class RemoteUser( object ):
# Apache sets REMOTE_USER to the string '(null)' when using the
# Rewrite* method for passing REMOTE_USER and a user is
# un-authenticated. Any other possible values need to go here as well.
+ path_info = environ.get('PATH_INFO', '')
if environ.has_key( 'HTTP_REMOTE_USER' ) and environ[ 'HTTP_REMOTE_USER' ] != '(null)':
- path_info = environ.get('PATH_INFO', '')
if not environ[ 'HTTP_REMOTE_USER' ].count( '@' ):
if self.maildomain is not None:
environ[ 'HTTP_REMOTE_USER' ] += '@' + self.maildomain
@@ -96,7 +96,7 @@ class RemoteUser( object ):
if path_info.startswith( '/user/create' ) and environ[ 'HTTP_REMOTE_USER' ] in self.admin_users:
pass # admins can create users
elif path_info.startswith( '/user/api_keys' ):
- pass
+ pass # api keys can be managed when remote_user is in use
elif path_info.startswith( '/user' ):
title = "Access to Galaxy user controls is disabled"
message = """
@@ -105,6 +105,9 @@ class RemoteUser( object ):
"""
return self.error( start_response, title, message )
return self.app( environ, start_response )
+ elif path_info.startswith( '/api/' ):
+ # The API handles its own authentication via keys
+ return self.app( environ, start_response )
else:
title = "Access to Galaxy is denied"
message = """
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User Dan Blankenberg <dan(a)bx.psu.edu>
# Date 1280371288 14400
# Node ID 9780508ee0c31b132158842afcae6c5ef90028a7
# Parent 75e99661d24caaeda1381b14cd062ffdcf7c3ecd
Allow interval to bedstrict converter to work on bed files that may have e.g. a 'track' line.
--- a/lib/galaxy/datatypes/converters/interval_to_bedstrict_converter.py
+++ b/lib/galaxy/datatypes/converters/interval_to_bedstrict_converter.py
@@ -81,8 +81,8 @@ def __main__():
first_skipped_line = count + 1
continue
fields = line.split('\t')
- assert len( fields ) >= 3, 'A BED file requires at least 3 columns' #we can't fix this
try:
+ assert len( fields ) >= 3, 'A BED file requires at least 3 columns' #we can't fix this
if len(fields) > 12:
strict_bed = False
break