commit/galaxy-central: 37 new changesets
37 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/9c323aad4ffd/ Changeset: 9c323aad4ffd Branch: stable User: dan Date: 2015-01-13 15:25:57+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 5 files diff -r 6b0bd93038a843b1585155f0d63f0eea2459c70b -r 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -2,6 +2,7 @@ from os.path import abspath from galaxy.util import string_as_bool, stringify_dictionary_keys, listify +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.web import form_builder import galaxy.model @@ -176,7 +177,10 @@ def to_string( self, value ): return str( value ) - + + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -394,6 +398,10 @@ def to_string( self, value ): return simplejson.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return simplejson.dumps( value ) + class PythonObjectParameter( MetadataParameter ): def to_string( self, value ): @@ -417,7 +425,11 @@ if not value: return str( self.spec.no_value ) return value.file_name - + + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context={}, other_values={}, **kwd ): return form_builder.TextField( self.spec.name, value=str( value.id ) ) diff -r 6b0bd93038a843b1585155f0d63f0eea2459c70b -r 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -34,6 +34,8 @@ from galaxy.util import listify import galaxy.util.shed_util_common from galaxy.web import url_for +from galaxy.util.object_wrapper import wrap_with_safe_string +from galaxy import exceptions from galaxy.visualization.genome.visual_analytics import TracksterConfig @@ -2382,6 +2384,9 @@ # failed to pass; for tool writing convienence, provide a # NoneDataset param_dict[ out_name ] = NoneDataset( datatypes_registry = self.app.datatypes_registry, ext = output.format ) + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper( self.app ) # More convienent access to app.config.new_file_path; we don't need to @@ -2399,6 +2404,23 @@ param_dict['__admin_users__'] = self.app.config.admin_users # Return the dictionary of parameters return param_dict + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.options or self.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) def build_param_file( self, param_dict, directory=None ): """ Build temporary file for file based parameter transfer if needed @@ -3078,10 +3100,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again - setattr( self, name, rval ) + setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): return self.metadata.__nonzero__() @@ -3102,9 +3127,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry = datatypes_registry, ext = ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = false_path @@ -3112,10 +3141,28 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path + #does not implement support for false_extra_files_path + elif key == 'extra_files_path': + try: + # Assume it is an output and that this wrapper + # will be set with correct "files_path" for this + # job. + return self.files_path + except AttributeError: + # Otherwise, we have an input - delegate to model and + # object store to find the static location of this + # directory. + try: + return self.unsanitized.extra_files_path + except exceptions.ObjectNotFound: + # NestedObjectstore raises an error here + # instead of just returning a non-existent + # path like DiskObjectStore. + raise else: return getattr( self.dataset, key ) def __nonzero__( self ): diff -r 6b0bd93038a843b1585155f0d63f0eea2459c70b -r 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -6,6 +6,7 @@ from galaxy.tools.parameters.grouping import * from galaxy.util.template import fill_template from galaxy.util.none_like import NoneDataset +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.web import url_for from galaxy.exceptions import ObjectInvalid import galaxy.tools @@ -214,7 +215,7 @@ if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % input_dbkey ) + chrom_info = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % ( sanitize_lists_to_string( input_dbkey ) ) ) incoming[ "chromInfo" ] = chrom_info inp_data.update( db_datasets ) diff -r 6b0bd93038a843b1585155f0d63f0eea2459c70b -r 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -187,37 +187,59 @@ '#' : '__pd__' } -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): - """Restricts the characters that are allowed in a text""" + +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + """ + Restricts the characters that are allowed in text; accepts both strings + and lists of strings; non-string entities will be cast to strings. + """ + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) + +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + """Restricts the characters that are allowed in a string""" + out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): + +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: - print value - raise Exception, 'Unknown parameter type (%s)' % ( type( value ) ) - -valid_filename_chars = set( string.ascii_letters + string.digits + '_.' ) + raise Exception('Unknown parameter type (%s)' % ( type( value ) )) invalid_filenames = [ '', '.', '..' ] def sanitize_for_filename( text, default=None ): """ @@ -399,6 +421,28 @@ except: return default +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Returns a bytestring version of 's', encoded as specified in 'encoding'. + + If strings_only is True, don't convert (some) non-string-like objects. + + Adapted from an older, simpler version of django.utils.encoding.smart_str. + """ + if strings_only and isinstance(s, (type(None), int)): + return s + if not isinstance(s, basestring): + try: + return str(s) + except UnicodeEncodeError: + return unicode(s).encode(encoding, errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors) + elif s and encoding != 'utf-8': + return s.decode('utf-8', errors).encode(encoding, errors) + else: + return s + def object_to_string( obj ): return binascii.hexlify( obj ) diff -r 6b0bd93038a843b1585155f0d63f0eea2459c70b -r 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/f5d12a0ef90f/ Changeset: f5d12a0ef90f Branch: stable User: natefoo Date: 2015-01-13 15:26:03+00:00 Summary: Update tag latest_2013.01.13 for changeset 9c323aad4ffd Affected #: 1 file diff -r c25df9f3d31effd9c4abdbbffd40381f23fc6dc8 -r f5d12a0ef90f8244ade33db951e002518ed8a9fc .hgtags --- a/.hgtags +++ b/.hgtags @@ -10,7 +10,7 @@ 7e257c7b10badb65772b1528cb61d58175a42e47 release_2014.06.02 7a4d321c0e38fa263ea83d29a35a608c3181fcba latest_2014.06.02 9661b9d5d5b330483ae3ad2236410e0efaa7c500 latest_2014.04.14 -6b0bd93038a843b1585155f0d63f0eea2459c70b latest_2013.01.13 +9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f latest_2013.01.13 3e62060b14b9afc46f8e0ec02e1a4500d77db9e1 latest_2013.02.08 425009b3ff4d8b67d2812253b221f3c4f4a8d1e3 latest_2013.04.01 9713d86392ef985ffcdc39ff0c8ddf51a1f9ce47 latest_2013.06.03 https://bitbucket.org/galaxy/galaxy-central/commits/cbcce50577bf/ Changeset: cbcce50577bf Branch: stable User: natefoo Date: 2015-01-13 15:26:04+00:00 Summary: Merge head created for security fix on latest_2013.01.13 Affected #: 5 files diff -r f5d12a0ef90f8244ade33db951e002518ed8a9fc -r cbcce50577bf98aa164355caf631568e7c4b56f4 lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/b986c184be88/ Changeset: b986c184be88 Branch: stable User: dan Date: 2015-01-13 15:26:12+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 5 files diff -r 3e62060b14b9afc46f8e0ec02e1a4500d77db9e1 -r b986c184be88947b5d1d90be7f36cfd2627dd938 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -2,6 +2,7 @@ from os.path import abspath from galaxy.util import string_as_bool, stringify_dictionary_keys, listify +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.web import form_builder import galaxy.model @@ -176,7 +177,10 @@ def to_string( self, value ): return str( value ) - + + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -394,6 +398,10 @@ def to_string( self, value ): return simplejson.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return simplejson.dumps( value ) + class PythonObjectParameter( MetadataParameter ): def to_string( self, value ): @@ -417,7 +425,11 @@ if not value: return str( self.spec.no_value ) return value.file_name - + + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context={}, other_values={}, **kwd ): return form_builder.TextField( self.spec.name, value=str( value.id ) ) diff -r 3e62060b14b9afc46f8e0ec02e1a4500d77db9e1 -r b986c184be88947b5d1d90be7f36cfd2627dd938 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -35,6 +35,8 @@ from galaxy.util import listify import galaxy.util.shed_util_common from galaxy.web import url_for +from galaxy.util.object_wrapper import wrap_with_safe_string +from galaxy import exceptions from paste import httpexceptions @@ -2384,6 +2386,9 @@ # failed to pass; for tool writing convienence, provide a # NoneDataset param_dict[ out_name ] = NoneDataset( datatypes_registry = self.app.datatypes_registry, ext = output.format ) + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper( self.app ) # More convienent access to app.config.new_file_path; we don't need to @@ -2401,6 +2406,23 @@ param_dict['__admin_users__'] = self.app.config.admin_users # Return the dictionary of parameters return param_dict + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.options or self.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) def build_param_file( self, param_dict, directory=None ): """ Build temporary file for file based parameter transfer if needed @@ -3080,10 +3102,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again - setattr( self, name, rval ) + setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): return self.metadata.__nonzero__() @@ -3104,9 +3129,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry = datatypes_registry, ext = ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = false_path @@ -3114,10 +3143,28 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path + #does not implement support for false_extra_files_path + elif key == 'extra_files_path': + try: + # Assume it is an output and that this wrapper + # will be set with correct "files_path" for this + # job. + return self.files_path + except AttributeError: + # Otherwise, we have an input - delegate to model and + # object store to find the static location of this + # directory. + try: + return self.unsanitized.extra_files_path + except exceptions.ObjectNotFound: + # NestedObjectstore raises an error here + # instead of just returning a non-existent + # path like DiskObjectStore. + raise else: return getattr( self.dataset, key ) def __nonzero__( self ): diff -r 3e62060b14b9afc46f8e0ec02e1a4500d77db9e1 -r b986c184be88947b5d1d90be7f36cfd2627dd938 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -6,6 +6,7 @@ from galaxy.tools.parameters.grouping import * from galaxy.util.template import fill_template from galaxy.util.none_like import NoneDataset +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.web import url_for from galaxy.exceptions import ObjectInvalid import galaxy.tools @@ -214,7 +215,7 @@ if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % input_dbkey ) + chrom_info = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % ( sanitize_lists_to_string( input_dbkey ) ) ) incoming[ "chromInfo" ] = chrom_info inp_data.update( db_datasets ) diff -r 3e62060b14b9afc46f8e0ec02e1a4500d77db9e1 -r b986c184be88947b5d1d90be7f36cfd2627dd938 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -187,37 +187,59 @@ '#' : '__pd__' } -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): - """Restricts the characters that are allowed in a text""" + +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + """ + Restricts the characters that are allowed in text; accepts both strings + and lists of strings; non-string entities will be cast to strings. + """ + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) + +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + """Restricts the characters that are allowed in a string""" + out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): + +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: - print value - raise Exception, 'Unknown parameter type (%s)' % ( type( value ) ) - -valid_filename_chars = set( string.ascii_letters + string.digits + '_.' ) + raise Exception('Unknown parameter type (%s)' % ( type( value ) )) invalid_filenames = [ '', '.', '..' ] def sanitize_for_filename( text, default=None ): """ @@ -399,6 +421,28 @@ except: return default +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Returns a bytestring version of 's', encoded as specified in 'encoding'. + + If strings_only is True, don't convert (some) non-string-like objects. + + Adapted from an older, simpler version of django.utils.encoding.smart_str. + """ + if strings_only and isinstance(s, (type(None), int)): + return s + if not isinstance(s, basestring): + try: + return str(s) + except UnicodeEncodeError: + return unicode(s).encode(encoding, errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors) + elif s and encoding != 'utf-8': + return s.decode('utf-8', errors).encode(encoding, errors) + else: + return s + def object_to_string( obj ): return binascii.hexlify( obj ) diff -r 3e62060b14b9afc46f8e0ec02e1a4500d77db9e1 -r b986c184be88947b5d1d90be7f36cfd2627dd938 lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/ccbe7f3db659/ Changeset: ccbe7f3db659 Branch: stable User: natefoo Date: 2015-01-13 15:26:18+00:00 Summary: Update tag latest_2013.02.08 for changeset b986c184be88 Affected #: 1 file diff -r cbcce50577bf98aa164355caf631568e7c4b56f4 -r ccbe7f3db659092c9d7df2845e4bab3f58a7a6b0 .hgtags --- a/.hgtags +++ b/.hgtags @@ -11,7 +11,7 @@ 7a4d321c0e38fa263ea83d29a35a608c3181fcba latest_2014.06.02 9661b9d5d5b330483ae3ad2236410e0efaa7c500 latest_2014.04.14 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f latest_2013.01.13 -3e62060b14b9afc46f8e0ec02e1a4500d77db9e1 latest_2013.02.08 +b986c184be88947b5d1d90be7f36cfd2627dd938 latest_2013.02.08 425009b3ff4d8b67d2812253b221f3c4f4a8d1e3 latest_2013.04.01 9713d86392ef985ffcdc39ff0c8ddf51a1f9ce47 latest_2013.06.03 9ed84cd208e07e8985ec917cb025fcbbb09edcfb latest_2013.08.12 https://bitbucket.org/galaxy/galaxy-central/commits/7c4a6bf151a2/ Changeset: 7c4a6bf151a2 Branch: stable User: natefoo Date: 2015-01-13 15:26:20+00:00 Summary: Merge head created for security fix on latest_2013.02.08 Affected #: 2 files https://bitbucket.org/galaxy/galaxy-central/commits/dec9431d66b8/ Changeset: dec9431d66b8 Branch: stable User: dan Date: 2015-01-13 15:26:27+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 5 files diff -r 425009b3ff4d8b67d2812253b221f3c4f4a8d1e3 -r dec9431d66b837a208e2f060d90afd913c721227 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -2,6 +2,7 @@ from os.path import abspath from galaxy.util import string_as_bool, stringify_dictionary_keys, listify +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.web import form_builder import galaxy.model @@ -176,7 +177,10 @@ def to_string( self, value ): return str( value ) - + + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -394,6 +398,10 @@ def to_string( self, value ): return simplejson.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return simplejson.dumps( value ) + class PythonObjectParameter( MetadataParameter ): def to_string( self, value ): @@ -417,7 +425,11 @@ if not value: return str( self.spec.no_value ) return value.file_name - + + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context={}, other_values={}, **kwd ): return form_builder.TextField( self.spec.name, value=str( value.id ) ) diff -r 425009b3ff4d8b67d2812253b221f3c4f4a8d1e3 -r dec9431d66b837a208e2f060d90afd913c721227 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -57,6 +57,8 @@ from galaxy.web import url_for from galaxy.web.form_builder import SelectField from tool_shed.util import shed_util_common +from galaxy.util.object_wrapper import wrap_with_safe_string +from galaxy import exceptions log = logging.getLogger( __name__ ) @@ -2621,7 +2623,9 @@ return self.app.tool_data_tables[ table_name ].get_entry( query_attr, query_val, return_attr ) param_dict['__get_data_table_entry__'] = get_data_table_entry - + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper( self.app ) # More convienent access to app.config.new_file_path; we don't need to @@ -2639,6 +2643,23 @@ param_dict['__admin_users__'] = self.app.config.admin_users # Return the dictionary of parameters return param_dict + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.options or self.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) def build_param_file( self, param_dict, directory=None ): """ Build temporary file for file based parameter transfer if needed @@ -3421,10 +3442,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): return self.metadata.__nonzero__() @@ -3445,9 +3469,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry = datatypes_registry, ext = ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = false_path @@ -3455,10 +3483,28 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path + #does not implement support for false_extra_files_path + elif key == 'extra_files_path': + try: + # Assume it is an output and that this wrapper + # will be set with correct "files_path" for this + # job. + return self.files_path + except AttributeError: + # Otherwise, we have an input - delegate to model and + # object store to find the static location of this + # directory. + try: + return self.unsanitized.extra_files_path + except exceptions.ObjectNotFound: + # NestedObjectstore raises an error here + # instead of just returning a non-existent + # path like DiskObjectStore. + raise else: return getattr( self.dataset, key ) def __nonzero__( self ): diff -r 425009b3ff4d8b67d2812253b221f3c4f4a8d1e3 -r dec9431d66b837a208e2f060d90afd913c721227 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -6,6 +6,7 @@ from galaxy.tools.parameters.grouping import * from galaxy.util.template import fill_template from galaxy.util.none_like import NoneDataset +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.web import url_for from galaxy.exceptions import ObjectInvalid import galaxy.tools @@ -214,7 +215,7 @@ if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % input_dbkey ) + chrom_info = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % ( sanitize_lists_to_string( input_dbkey ) ) ) incoming[ "chromInfo" ] = chrom_info inp_data.update( db_datasets ) diff -r 425009b3ff4d8b67d2812253b221f3c4f4a8d1e3 -r dec9431d66b837a208e2f060d90afd913c721227 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -187,48 +187,60 @@ '#' : '__pd__' } -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): + +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): + +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: - print value - raise Exception, 'Unknown parameter type (%s)' % ( type( value ) ) + raise Exception('Unknown parameter type (%s)' % ( type( value ) )) -valid_filename_chars = set( string.ascii_letters + string.digits + '_.' ) invalid_filenames = [ '', '.', '..' ] def sanitize_for_filename( text, default=None ): """ @@ -425,6 +437,28 @@ except: return default +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Returns a bytestring version of 's', encoded as specified in 'encoding'. + + If strings_only is True, don't convert (some) non-string-like objects. + + Adapted from an older, simpler version of django.utils.encoding.smart_str. + """ + if strings_only and isinstance(s, (type(None), int)): + return s + if not isinstance(s, basestring): + try: + return str(s) + except UnicodeEncodeError: + return unicode(s).encode(encoding, errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors) + elif s and encoding != 'utf-8': + return s.decode('utf-8', errors).encode(encoding, errors) + else: + return s + def object_to_string( obj ): return binascii.hexlify( obj ) diff -r 425009b3ff4d8b67d2812253b221f3c4f4a8d1e3 -r dec9431d66b837a208e2f060d90afd913c721227 lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/b979070e064a/ Changeset: b979070e064a Branch: stable User: natefoo Date: 2015-01-13 15:26:32+00:00 Summary: Update tag latest_2013.04.01 for changeset dec9431d66b8 Affected #: 1 file diff -r 7c4a6bf151a279cbc9b639e1804f0701d3b11ccf -r b979070e064ace7b70ece868977e72712f647a80 .hgtags --- a/.hgtags +++ b/.hgtags @@ -12,7 +12,7 @@ 9661b9d5d5b330483ae3ad2236410e0efaa7c500 latest_2014.04.14 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f latest_2013.01.13 b986c184be88947b5d1d90be7f36cfd2627dd938 latest_2013.02.08 -425009b3ff4d8b67d2812253b221f3c4f4a8d1e3 latest_2013.04.01 +dec9431d66b837a208e2f060d90afd913c721227 latest_2013.04.01 9713d86392ef985ffcdc39ff0c8ddf51a1f9ce47 latest_2013.06.03 9ed84cd208e07e8985ec917cb025fcbbb09edcfb latest_2013.08.12 81fbe25bd02edcd53065e8e4476dd1dfb5a72cf2 latest_2013.11.04 https://bitbucket.org/galaxy/galaxy-central/commits/29a6bfeb4f46/ Changeset: 29a6bfeb4f46 Branch: stable User: natefoo Date: 2015-01-13 15:26:33+00:00 Summary: Merge head created for security fix on latest_2013.04.01 Affected #: 3 files https://bitbucket.org/galaxy/galaxy-central/commits/19e56e66b0b3/ Changeset: 19e56e66b0b3 Branch: stable User: dan Date: 2015-01-13 15:26:40+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 5 files diff -r 9713d86392ef985ffcdc39ff0c8ddf51a1f9ce47 -r 19e56e66b0b344c6e2afa4541f6988e4fdb9af29 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -20,6 +20,7 @@ import galaxy.model from galaxy.util import listify, stringify_dictionary_keys, string_as_bool +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.web import form_builder from sqlalchemy.orm import object_session @@ -187,7 +188,10 @@ def to_string( self, value ): return str( value ) - + + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -405,6 +409,10 @@ def to_string( self, value ): return simplejson.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return simplejson.dumps( value ) + class PythonObjectParameter( MetadataParameter ): def to_string( self, value ): @@ -428,7 +436,11 @@ if not value: return str( self.spec.no_value ) return value.file_name - + + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context={}, other_values={}, **kwd ): return form_builder.TextField( self.spec.name, value=str( value.id ) ) diff -r 9713d86392ef985ffcdc39ff0c8ddf51a1f9ce47 -r 19e56e66b0b344c6e2afa4541f6988e4fdb9af29 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -60,6 +60,8 @@ from galaxy.web.form_builder import SelectField from tool_shed.util import shed_util_common from .loader import load_tool, template_macro_params +from galaxy.util.object_wrapper import wrap_with_safe_string +from galaxy import exceptions log = logging.getLogger( __name__ ) @@ -2543,7 +2545,9 @@ return self.app.tool_data_tables[ table_name ].get_entry( query_attr, query_val, return_attr ) param_dict['__get_data_table_entry__'] = get_data_table_entry - + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper( self.app ) # More convienent access to app.config.new_file_path; we don't need to @@ -2562,6 +2566,23 @@ param_dict['__user__'] = RawObjectWrapper( param_dict.get( '__user__', None ) ) # Return the dictionary of parameters return param_dict + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.options or self.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) def build_param_file( self, param_dict, directory=None ): """ Build temporary file for file based parameter transfer if needed @@ -3357,10 +3378,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): return self.metadata.__nonzero__() @@ -3381,9 +3405,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry = datatypes_registry, ext = ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = false_path @@ -3391,10 +3419,28 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path + #does not implement support for false_extra_files_path + elif key == 'extra_files_path': + try: + # Assume it is an output and that this wrapper + # will be set with correct "files_path" for this + # job. + return self.files_path + except AttributeError: + # Otherwise, we have an input - delegate to model and + # object store to find the static location of this + # directory. + try: + return self.unsanitized.extra_files_path + except exceptions.ObjectNotFound: + # NestedObjectstore raises an error here + # instead of just returning a non-existent + # path like DiskObjectStore. + raise else: return getattr( self.dataset, key ) def __nonzero__( self ): diff -r 9713d86392ef985ffcdc39ff0c8ddf51a1f9ce47 -r 19e56e66b0b344c6e2afa4541f6988e4fdb9af29 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -6,6 +6,7 @@ from galaxy.tools.parameters.grouping import * from galaxy.util.template import fill_template from galaxy.util.none_like import NoneDataset +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.web import url_for from galaxy.exceptions import ObjectInvalid import galaxy.tools @@ -214,7 +215,7 @@ if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % input_dbkey ) + chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % ( sanitize_lists_to_string( input_dbkey ) ) ) incoming[ "chromInfo" ] = chrom_info inp_data.update( db_datasets ) diff -r 9713d86392ef985ffcdc39ff0c8ddf51a1f9ce47 -r 19e56e66b0b344c6e2afa4541f6988e4fdb9af29 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -249,48 +249,60 @@ '#' : '__pd__' } -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): + +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): + +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: - print value - raise Exception, 'Unknown parameter type (%s)' % ( type( value ) ) + raise Exception('Unknown parameter type (%s)' % ( type( value ) )) -valid_filename_chars = set( string.ascii_letters + string.digits + '_.' ) invalid_filenames = [ '', '.', '..' ] def sanitize_for_filename( text, default=None ): """ @@ -488,6 +500,28 @@ except: return default +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Returns a bytestring version of 's', encoded as specified in 'encoding'. + + If strings_only is True, don't convert (some) non-string-like objects. + + Adapted from an older, simpler version of django.utils.encoding.smart_str. + """ + if strings_only and isinstance(s, (type(None), int)): + return s + if not isinstance(s, basestring): + try: + return str(s) + except UnicodeEncodeError: + return unicode(s).encode(encoding, errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors) + elif s and encoding != 'utf-8': + return s.decode('utf-8', errors).encode(encoding, errors) + else: + return s + def object_to_string( obj ): return binascii.hexlify( obj ) diff -r 9713d86392ef985ffcdc39ff0c8ddf51a1f9ce47 -r 19e56e66b0b344c6e2afa4541f6988e4fdb9af29 lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/4b481505104e/ Changeset: 4b481505104e Branch: stable User: natefoo Date: 2015-01-13 15:26:45+00:00 Summary: Update tag latest_2013.06.03 for changeset 19e56e66b0b3 Affected #: 1 file diff -r 29a6bfeb4f468e76c84bea8bc5d445ebf08fdfee -r 4b481505104efe8a711532ad0878e7f5cb6786c0 .hgtags --- a/.hgtags +++ b/.hgtags @@ -13,7 +13,7 @@ 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f latest_2013.01.13 b986c184be88947b5d1d90be7f36cfd2627dd938 latest_2013.02.08 dec9431d66b837a208e2f060d90afd913c721227 latest_2013.04.01 -9713d86392ef985ffcdc39ff0c8ddf51a1f9ce47 latest_2013.06.03 +19e56e66b0b344c6e2afa4541f6988e4fdb9af29 latest_2013.06.03 9ed84cd208e07e8985ec917cb025fcbbb09edcfb latest_2013.08.12 81fbe25bd02edcd53065e8e4476dd1dfb5a72cf2 latest_2013.11.04 2a756ca2cb1826db7796018e77d12e2dd7b67603 latest_2014.02.10 https://bitbucket.org/galaxy/galaxy-central/commits/91ca514f61ec/ Changeset: 91ca514f61ec Branch: stable User: natefoo Date: 2015-01-13 15:26:46+00:00 Summary: Merge head created for security fix on latest_2013.06.03 Affected #: 4 files https://bitbucket.org/galaxy/galaxy-central/commits/cee903b8b3ee/ Changeset: cee903b8b3ee Branch: stable User: dan Date: 2015-01-13 15:26:56+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 5 files diff -r 9ed84cd208e07e8985ec917cb025fcbbb09edcfb -r cee903b8b3eee9145627ee89742555dac581791e lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -20,6 +20,7 @@ import galaxy.model from galaxy.util import listify, stringify_dictionary_keys, string_as_bool +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.web import form_builder from sqlalchemy.orm import object_session @@ -187,7 +188,10 @@ def to_string( self, value ): return str( value ) - + + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -405,6 +409,10 @@ def to_string( self, value ): return simplejson.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return simplejson.dumps( value ) + class PythonObjectParameter( MetadataParameter ): def to_string( self, value ): @@ -428,7 +436,11 @@ if not value: return str( self.spec.no_value ) return value.file_name - + + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context={}, other_values={}, **kwd ): return form_builder.TextField( self.spec.name, value=str( value.id ) ) diff -r 9ed84cd208e07e8985ec917cb025fcbbb09edcfb -r cee903b8b3eee9145627ee89742555dac581791e lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -61,6 +61,8 @@ from galaxy.web.form_builder import SelectField from tool_shed.util import shed_util_common from .loader import load_tool, template_macro_params +from galaxy.util.object_wrapper import wrap_with_safe_string +from galaxy import exceptions log = logging.getLogger( __name__ ) @@ -2553,7 +2555,9 @@ return self.app.tool_data_tables[ table_name ].get_entry( query_attr, query_val, return_attr ) param_dict['__get_data_table_entry__'] = get_data_table_entry - + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper( self.app ) # More convienent access to app.config.new_file_path; we don't need to @@ -2572,6 +2576,23 @@ param_dict['__user__'] = RawObjectWrapper( param_dict.get( '__user__', None ) ) # Return the dictionary of parameters return param_dict + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.options or self.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) def build_param_file( self, param_dict, directory=None ): """ Build temporary file for file based parameter transfer if needed @@ -3369,10 +3390,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): return self.metadata.__nonzero__() @@ -3393,9 +3417,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry = datatypes_registry, ext = ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = false_path @@ -3403,10 +3431,28 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path + #does not implement support for false_extra_files_path + elif key == 'extra_files_path': + try: + # Assume it is an output and that this wrapper + # will be set with correct "files_path" for this + # job. + return self.files_path + except AttributeError: + # Otherwise, we have an input - delegate to model and + # object store to find the static location of this + # directory. + try: + return self.unsanitized.extra_files_path + except exceptions.ObjectNotFound: + # NestedObjectstore raises an error here + # instead of just returning a non-existent + # path like DiskObjectStore. + raise else: return getattr( self.dataset, key ) def __nonzero__( self ): diff -r 9ed84cd208e07e8985ec917cb025fcbbb09edcfb -r cee903b8b3eee9145627ee89742555dac581791e lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -10,6 +10,7 @@ from galaxy.util.none_like import NoneDataset from galaxy.util.odict import odict from galaxy.util.template import fill_template +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.web import url_for import logging @@ -217,7 +218,7 @@ if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % input_dbkey ) + chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % ( sanitize_lists_to_string( input_dbkey ) ) ) incoming[ "chromInfo" ] = chrom_info inp_data.update( db_datasets ) diff -r 9ed84cd208e07e8985ec917cb025fcbbb09edcfb -r cee903b8b3eee9145627ee89742555dac581791e lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -301,47 +301,60 @@ '#' : '__pd__' } -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): + +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): + +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: - raise Exception, 'Unknown parameter type (%s)' % ( type( value ) ) + raise Exception('Unknown parameter type (%s)' % ( type( value ) )) -valid_filename_chars = set( string.ascii_letters + string.digits + '_.' ) invalid_filenames = [ '', '.', '..' ] def sanitize_for_filename( text, default=None ): """ @@ -539,6 +552,28 @@ except: return default +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Returns a bytestring version of 's', encoded as specified in 'encoding'. + + If strings_only is True, don't convert (some) non-string-like objects. + + Adapted from an older, simpler version of django.utils.encoding.smart_str. + """ + if strings_only and isinstance(s, (type(None), int)): + return s + if not isinstance(s, basestring): + try: + return str(s) + except UnicodeEncodeError: + return unicode(s).encode(encoding, errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors) + elif s and encoding != 'utf-8': + return s.decode('utf-8', errors).encode(encoding, errors) + else: + return s + def object_to_string( obj ): return binascii.hexlify( obj ) diff -r 9ed84cd208e07e8985ec917cb025fcbbb09edcfb -r cee903b8b3eee9145627ee89742555dac581791e lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/f87251ad7e02/ Changeset: f87251ad7e02 Branch: stable User: natefoo Date: 2015-01-13 15:27:02+00:00 Summary: Update tag latest_2013.08.12 for changeset cee903b8b3ee Affected #: 1 file diff -r 91ca514f61ec74cecd7ef93a1f34b4fc618e0694 -r f87251ad7e0262908cb4f6e37e235610c16eaeb9 .hgtags --- a/.hgtags +++ b/.hgtags @@ -14,7 +14,7 @@ b986c184be88947b5d1d90be7f36cfd2627dd938 latest_2013.02.08 dec9431d66b837a208e2f060d90afd913c721227 latest_2013.04.01 19e56e66b0b344c6e2afa4541f6988e4fdb9af29 latest_2013.06.03 -9ed84cd208e07e8985ec917cb025fcbbb09edcfb latest_2013.08.12 +cee903b8b3eee9145627ee89742555dac581791e latest_2013.08.12 81fbe25bd02edcd53065e8e4476dd1dfb5a72cf2 latest_2013.11.04 2a756ca2cb1826db7796018e77d12e2dd7b67603 latest_2014.02.10 ca45b78adb4152fc6e7395514d46eba6b7d0b838 release_2014.08.11 https://bitbucket.org/galaxy/galaxy-central/commits/0face4c7b1c9/ Changeset: 0face4c7b1c9 Branch: stable User: natefoo Date: 2015-01-13 15:27:03+00:00 Summary: Merge head created for security fix on latest_2013.08.12 Affected #: 3 files https://bitbucket.org/galaxy/galaxy-central/commits/7d5aa19a166c/ Changeset: 7d5aa19a166c Branch: stable User: dan Date: 2015-01-13 15:27:09+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 5 files diff -r 81fbe25bd02edcd53065e8e4476dd1dfb5a72cf2 -r 7d5aa19a166cba9039e15f338a1e3fc924c43d3a lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -20,6 +20,7 @@ import galaxy.model from galaxy.util import listify, stringify_dictionary_keys, string_as_bool +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.web import form_builder from sqlalchemy.orm import object_session @@ -188,6 +189,9 @@ def to_string( self, value ): return str( value ) + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -405,6 +409,10 @@ def to_string( self, value ): return simplejson.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return simplejson.dumps( value ) + class PythonObjectParameter( MetadataParameter ): def to_string( self, value ): @@ -429,6 +437,10 @@ return str( self.spec.no_value ) return value.file_name + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context={}, other_values={}, **kwd ): return form_builder.TextField( self.spec.name, value=str( value.id ) ) diff -r 81fbe25bd02edcd53065e8e4476dd1dfb5a72cf2 -r 7d5aa19a166cba9039e15f338a1e3fc924c43d3a lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -65,6 +65,8 @@ from galaxy.model.item_attrs import Dictifiable from tool_shed.util import shed_util_common from .loader import load_tool, template_macro_params +from galaxy.util.object_wrapper import wrap_with_safe_string +from galaxy import exceptions log = logging.getLogger( __name__ ) @@ -2560,7 +2562,9 @@ return self.app.tool_data_tables[ table_name ].get_entry( query_attr, query_val, return_attr ) param_dict['__get_data_table_entry__'] = get_data_table_entry - + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper( self.app ) # More convienent access to app.config.new_file_path; we don't need to @@ -2579,6 +2583,23 @@ param_dict['__user__'] = RawObjectWrapper( param_dict.get( '__user__', None ) ) # Return the dictionary of parameters return param_dict + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.options or self.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) def build_param_file( self, param_dict, directory=None ): """ Build temporary file for file based parameter transfer if needed @@ -3352,10 +3373,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): return self.metadata.__nonzero__() @@ -3376,9 +3400,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry = datatypes_registry, ext = ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = false_path @@ -3386,10 +3414,28 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path + #does not implement support for false_extra_files_path + elif key == 'extra_files_path': + try: + # Assume it is an output and that this wrapper + # will be set with correct "files_path" for this + # job. + return self.files_path + except AttributeError: + # Otherwise, we have an input - delegate to model and + # object store to find the static location of this + # directory. + try: + return self.unsanitized.extra_files_path + except exceptions.ObjectNotFound: + # NestedObjectstore raises an error here + # instead of just returning a non-existent + # path like DiskObjectStore. + raise else: return getattr( self.dataset, key ) def __nonzero__( self ): diff -r 81fbe25bd02edcd53065e8e4476dd1dfb5a72cf2 -r 7d5aa19a166cba9039e15f338a1e3fc924c43d3a lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -10,6 +10,7 @@ from galaxy.util.none_like import NoneDataset from galaxy.util.odict import odict from galaxy.util.template import fill_template +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.web import url_for import logging @@ -224,7 +225,7 @@ if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % input_dbkey ) + chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % ( sanitize_lists_to_string( input_dbkey ) ) ) incoming[ "chromInfo" ] = chrom_info inp_data.update( db_datasets ) diff -r 81fbe25bd02edcd53065e8e4476dd1dfb5a72cf2 -r 7d5aa19a166cba9039e15f338a1e3fc924c43d3a lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -309,47 +309,60 @@ '#' : '__pd__' } -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): + +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): + +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: - raise Exception, 'Unknown parameter type (%s)' % ( type( value ) ) + raise Exception('Unknown parameter type (%s)' % ( type( value ) )) -valid_filename_chars = set( string.ascii_letters + string.digits + '_.' ) invalid_filenames = [ '', '.', '..' ] def sanitize_for_filename( text, default=None ): """ @@ -574,6 +587,28 @@ return s +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Returns a bytestring version of 's', encoded as specified in 'encoding'. + + If strings_only is True, don't convert (some) non-string-like objects. + + Adapted from an older, simpler version of django.utils.encoding.smart_str. + """ + if strings_only and isinstance(s, (type(None), int)): + return s + if not isinstance(s, basestring): + try: + return str(s) + except UnicodeEncodeError: + return unicode(s).encode(encoding, errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors) + elif s and encoding != 'utf-8': + return s.decode('utf-8', errors).encode(encoding, errors) + else: + return s + def object_to_string( obj ): return binascii.hexlify( obj ) diff -r 81fbe25bd02edcd53065e8e4476dd1dfb5a72cf2 -r 7d5aa19a166cba9039e15f338a1e3fc924c43d3a lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/48635612a356/ Changeset: 48635612a356 Branch: stable User: natefoo Date: 2015-01-13 15:27:13+00:00 Summary: Update tag latest_2013.11.04 for changeset 7d5aa19a166c Affected #: 1 file diff -r 0face4c7b1c9c1991fb7ab93abca5e0f47eece96 -r 48635612a3561e83aa77d604e44d0cea80969c2a .hgtags --- a/.hgtags +++ b/.hgtags @@ -15,7 +15,7 @@ dec9431d66b837a208e2f060d90afd913c721227 latest_2013.04.01 19e56e66b0b344c6e2afa4541f6988e4fdb9af29 latest_2013.06.03 cee903b8b3eee9145627ee89742555dac581791e latest_2013.08.12 -81fbe25bd02edcd53065e8e4476dd1dfb5a72cf2 latest_2013.11.04 +7d5aa19a166cba9039e15f338a1e3fc924c43d3a latest_2013.11.04 2a756ca2cb1826db7796018e77d12e2dd7b67603 latest_2014.02.10 ca45b78adb4152fc6e7395514d46eba6b7d0b838 release_2014.08.11 548ab24667d6206780237bd807f7d857a484c461 latest_2014.08.11 https://bitbucket.org/galaxy/galaxy-central/commits/5b0c93bcc791/ Changeset: 5b0c93bcc791 Branch: stable User: natefoo Date: 2015-01-13 15:27:15+00:00 Summary: Merge head created for security fix on latest_2013.11.04 Affected #: 4 files https://bitbucket.org/galaxy/galaxy-central/commits/0c000cc2f9c0/ Changeset: 0c000cc2f9c0 Branch: stable User: dan Date: 2015-01-13 15:27:19+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 6 files diff -r 2a756ca2cb1826db7796018e77d12e2dd7b67603 -r 0c000cc2f9c05bf4c1c2bc3a10215014fd64e696 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -17,6 +17,7 @@ import galaxy.model from galaxy.util import listify, stringify_dictionary_keys, string_as_bool +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.util import in_directory from galaxy.web import form_builder @@ -209,6 +210,9 @@ def to_string( self, value ): return str( value ) + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -457,6 +461,10 @@ def to_string( self, value ): return json.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return json.dumps( value ) + class PythonObjectParameter( MetadataParameter ): @@ -487,6 +495,10 @@ return str( self.spec.no_value ) return value.file_name + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context=None, other_values=None, **kwd ): context = context or {} other_values = other_values or {} diff -r 2a756ca2cb1826db7796018e77d12e2dd7b67603 -r 0c000cc2f9c05bf4c1c2bc3a10215014fd64e696 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -9,6 +9,7 @@ from galaxy.util.none_like import NoneDataset from galaxy.util.odict import odict from galaxy.util.template import fill_template +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.web import url_for import logging @@ -176,7 +177,7 @@ if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % input_dbkey ) + chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % ( sanitize_lists_to_string( input_dbkey ) ) ) incoming[ "chromInfo" ] = os.path.abspath( chrom_info ) inp_data.update( db_datasets ) diff -r 2a756ca2cb1826db7796018e77d12e2dd7b67603 -r 0c000cc2f9c05bf4c1c2bc3a10215014fd64e696 lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -2,10 +2,12 @@ import tempfile from galaxy import model +from galaxy.util.object_wrapper import wrap_with_safe_string from galaxy.util.bunch import Bunch from galaxy.util.none_like import NoneDataset from galaxy.util.template import fill_template from galaxy.tools.wrappers import ( + ToolParameterValueWrapper, DatasetFilenameWrapper, DatasetListWrapper, LibraryDatasetValueWrapper, @@ -109,6 +111,9 @@ self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters @@ -309,6 +314,24 @@ #the paths rewritten. self.__walk_inputs( self.tool.inputs, param_dict, rewrite_unstructured_paths ) + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.tool or not self.tool.options or self.tool.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) + def build( self ): """ Build runtime description of job to execute, evaluate command and diff -r 2a756ca2cb1826db7796018e77d12e2dd7b67603 -r 0c000cc2f9c05bf4c1c2bc3a10215014fd64e696 lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -1,5 +1,7 @@ import pipes +from galaxy import exceptions from galaxy.util.none_like import NoneDataset +from galaxy.util.object_wrapper import wrap_with_safe_string class ToolParameterValueWrapper( object ): @@ -145,10 +147,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): @@ -173,9 +178,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry=datatypes_registry, ext=ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = getattr( dataset_path, "false_path", None ) self.false_extra_files_path = getattr( dataset_path, "false_extra_files_path", None ) @@ -184,13 +193,31 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path elif self.false_extra_files_path is not None and key == 'extra_files_path': + # Path to extra files was rewritten for this job. return self.false_extra_files_path + elif key == 'extra_files_path': + try: + # Assume it is an output and that this wrapper + # will be set with correct "files_path" for this + # job. + return self.files_path + except AttributeError: + # Otherwise, we have an input - delegate to model and + # object store to find the static location of this + # directory. + try: + return self.unsanitized.extra_files_path + except exceptions.ObjectNotFound: + # NestedObjectstore raises an error here + # instead of just returning a non-existent + # path like DiskObjectStore. + raise else: return getattr( self.dataset, key ) diff -r 2a756ca2cb1826db7796018e77d12e2dd7b67603 -r 0c000cc2f9c05bf4c1c2bc3a10215014fd64e696 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -328,48 +328,62 @@ '\n' : '__cn__', '\r' : '__cr__', '\t' : '__tc__', - '#' : '__pd__' - } + '#': '__pd__'} -def restore_text(text): + +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): + +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): + +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: - raise Exception, 'Unknown parameter type (%s)' % ( type( value ) ) + raise Exception('Unknown parameter type (%s)' % ( type( value ) )) valid_filename_chars = set( string.ascii_letters + string.digits + '_.' ) invalid_filenames = [ '', '.', '..' ] diff -r 2a756ca2cb1826db7796018e77d12e2dd7b67603 -r 0c000cc2f9c05bf4c1c2bc3a10215014fd64e696 lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/bf8b6aeb7fd7/ Changeset: bf8b6aeb7fd7 Branch: stable User: natefoo Date: 2015-01-13 15:27:22+00:00 Summary: Update tag latest_2014.02.10 for changeset 0c000cc2f9c0 Affected #: 1 file diff -r 5b0c93bcc791747b315038a66fdb785f9db4ea47 -r bf8b6aeb7fd7d6b7ae8f64b97f7d585df22817b6 .hgtags --- a/.hgtags +++ b/.hgtags @@ -16,7 +16,7 @@ 19e56e66b0b344c6e2afa4541f6988e4fdb9af29 latest_2013.06.03 cee903b8b3eee9145627ee89742555dac581791e latest_2013.08.12 7d5aa19a166cba9039e15f338a1e3fc924c43d3a latest_2013.11.04 -2a756ca2cb1826db7796018e77d12e2dd7b67603 latest_2014.02.10 +0c000cc2f9c05bf4c1c2bc3a10215014fd64e696 latest_2014.02.10 ca45b78adb4152fc6e7395514d46eba6b7d0b838 release_2014.08.11 548ab24667d6206780237bd807f7d857a484c461 latest_2014.08.11 2092948937ac30ef82f71463a235c66d34987088 release_2014.10.06 https://bitbucket.org/galaxy/galaxy-central/commits/50867216bdc8/ Changeset: 50867216bdc8 Branch: stable User: natefoo Date: 2015-01-13 15:27:23+00:00 Summary: Merge head created for security fix on latest_2014.02.10 Affected #: 5 files https://bitbucket.org/galaxy/galaxy-central/commits/8f9dcac03369/ Changeset: 8f9dcac03369 Branch: stable User: dan Date: 2015-01-13 15:27:27+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 6 files diff -r 9661b9d5d5b330483ae3ad2236410e0efaa7c500 -r 8f9dcac033694e4cabcf5daae5cca1cfefbe967f lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -17,6 +17,7 @@ import galaxy.model from galaxy.util import listify, stringify_dictionary_keys, string_as_bool +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.util import in_directory from galaxy.web import form_builder @@ -209,6 +210,9 @@ def to_string( self, value ): return str( value ) + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -457,6 +461,10 @@ def to_string( self, value ): return json.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return json.dumps( value ) + class PythonObjectParameter( MetadataParameter ): @@ -487,6 +495,10 @@ return str( self.spec.no_value ) return value.file_name + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context=None, other_values=None, **kwd ): context = context or {} other_values = other_values or {} diff -r 9661b9d5d5b330483ae3ad2236410e0efaa7c500 -r 8f9dcac033694e4cabcf5daae5cca1cfefbe967f lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -9,6 +9,7 @@ from galaxy.util.none_like import NoneDataset from galaxy.util.odict import odict from galaxy.util.template import fill_template +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.web import url_for import logging @@ -176,7 +177,7 @@ if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % input_dbkey ) + chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % ( sanitize_lists_to_string( input_dbkey ) ) ) incoming[ "chromInfo" ] = os.path.abspath( chrom_info ) inp_data.update( db_datasets ) diff -r 9661b9d5d5b330483ae3ad2236410e0efaa7c500 -r 8f9dcac033694e4cabcf5daae5cca1cfefbe967f lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -2,10 +2,12 @@ import tempfile from galaxy import model +from galaxy.util.object_wrapper import wrap_with_safe_string from galaxy.util.bunch import Bunch from galaxy.util.none_like import NoneDataset from galaxy.util.template import fill_template from galaxy.tools.wrappers import ( + ToolParameterValueWrapper, DatasetFilenameWrapper, DatasetListWrapper, LibraryDatasetValueWrapper, @@ -109,6 +111,9 @@ self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters @@ -309,6 +314,24 @@ #the paths rewritten. self.__walk_inputs( self.tool.inputs, param_dict, rewrite_unstructured_paths ) + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.tool or not self.tool.options or self.tool.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) + def build( self ): """ Build runtime description of job to execute, evaluate command and diff -r 9661b9d5d5b330483ae3ad2236410e0efaa7c500 -r 8f9dcac033694e4cabcf5daae5cca1cfefbe967f lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -1,5 +1,7 @@ import pipes +from galaxy import exceptions from galaxy.util.none_like import NoneDataset +from galaxy.util.object_wrapper import wrap_with_safe_string class ToolParameterValueWrapper( object ): @@ -145,10 +147,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): @@ -173,9 +178,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry=datatypes_registry, ext=ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = getattr( dataset_path, "false_path", None ) self.false_extra_files_path = getattr( dataset_path, "false_extra_files_path", None ) @@ -184,13 +193,31 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path elif self.false_extra_files_path is not None and key == 'extra_files_path': + # Path to extra files was rewritten for this job. return self.false_extra_files_path + elif key == 'extra_files_path': + try: + # Assume it is an output and that this wrapper + # will be set with correct "files_path" for this + # job. + return self.files_path + except AttributeError: + # Otherwise, we have an input - delegate to model and + # object store to find the static location of this + # directory. + try: + return self.unsanitized.extra_files_path + except exceptions.ObjectNotFound: + # NestedObjectstore raises an error here + # instead of just returning a non-existent + # path like DiskObjectStore. + raise else: return getattr( self.dataset, key ) diff -r 9661b9d5d5b330483ae3ad2236410e0efaa7c500 -r 8f9dcac033694e4cabcf5daae5cca1cfefbe967f lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -330,48 +330,62 @@ '\n' : '__cn__', '\r' : '__cr__', '\t' : '__tc__', - '#' : '__pd__' - } + '#': '__pd__'} -def restore_text(text): + +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): + +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): + +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: - raise Exception, 'Unknown parameter type (%s)' % ( type( value ) ) + raise Exception('Unknown parameter type (%s)' % ( type( value ) )) valid_filename_chars = set( string.ascii_letters + string.digits + '_.' ) invalid_filenames = [ '', '.', '..' ] diff -r 9661b9d5d5b330483ae3ad2236410e0efaa7c500 -r 8f9dcac033694e4cabcf5daae5cca1cfefbe967f lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/1cc98e8f6782/ Changeset: 1cc98e8f6782 Branch: stable User: natefoo Date: 2015-01-13 15:27:31+00:00 Summary: Update tag latest_2014.04.14 for changeset 8f9dcac03369 Affected #: 1 file diff -r 50867216bdc89348afbce913b8371eb3463ce576 -r 1cc98e8f678283d05dbd5693bc0f48ce49c340bf .hgtags --- a/.hgtags +++ b/.hgtags @@ -9,7 +9,7 @@ 9e53251b0b7e93b9563008a2b112f2e815a04bbc release_2014.04.14 7e257c7b10badb65772b1528cb61d58175a42e47 release_2014.06.02 7a4d321c0e38fa263ea83d29a35a608c3181fcba latest_2014.06.02 -9661b9d5d5b330483ae3ad2236410e0efaa7c500 latest_2014.04.14 +8f9dcac033694e4cabcf5daae5cca1cfefbe967f latest_2014.04.14 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f latest_2013.01.13 b986c184be88947b5d1d90be7f36cfd2627dd938 latest_2013.02.08 dec9431d66b837a208e2f060d90afd913c721227 latest_2013.04.01 https://bitbucket.org/galaxy/galaxy-central/commits/c46ef9a4aa9c/ Changeset: c46ef9a4aa9c Branch: stable User: natefoo Date: 2015-01-13 15:27:32+00:00 Summary: Merge head created for security fix on latest_2014.04.14 Affected #: 1 file https://bitbucket.org/galaxy/galaxy-central/commits/4145417a6e1c/ Changeset: 4145417a6e1c Branch: stable User: dan Date: 2015-01-13 15:27:36+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 6 files diff -r 7a4d321c0e38fa263ea83d29a35a608c3181fcba -r 4145417a6e1c13f82a3de365aadef0fb3ed7ab14 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -17,6 +17,7 @@ import galaxy.model from galaxy.util import listify, stringify_dictionary_keys, string_as_bool +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.util import in_directory from galaxy.web import form_builder @@ -209,6 +210,9 @@ def to_string( self, value ): return str( value ) + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -457,6 +461,10 @@ def to_string( self, value ): return json.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return json.dumps( value ) + class PythonObjectParameter( MetadataParameter ): @@ -487,6 +495,10 @@ return str( self.spec.no_value ) return value.file_name + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context=None, other_values=None, **kwd ): context = context or {} other_values = other_values or {} diff -r 7a4d321c0e38fa263ea83d29a35a608c3181fcba -r 4145417a6e1c13f82a3de365aadef0fb3ed7ab14 lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -2,10 +2,12 @@ import tempfile from galaxy import model +from galaxy.util.object_wrapper import wrap_with_safe_string from galaxy.util.bunch import Bunch from galaxy.util.none_like import NoneDataset from galaxy.util.template import fill_template from galaxy.tools.wrappers import ( + ToolParameterValueWrapper, DatasetFilenameWrapper, DatasetListWrapper, DatasetCollectionWrapper, @@ -114,6 +116,9 @@ self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters @@ -334,6 +339,24 @@ #the paths rewritten. self.__walk_inputs( self.tool.inputs, param_dict, rewrite_unstructured_paths ) + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.tool or not self.tool.options or self.tool.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) + def build( self ): """ Build runtime description of job to execute, evaluate command and diff -r 7a4d321c0e38fa263ea83d29a35a608c3181fcba -r 4145417a6e1c13f82a3de365aadef0fb3ed7ab14 lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -1,5 +1,7 @@ import pipes +from galaxy import exceptions from galaxy.util.none_like import NoneDataset +from galaxy.util.object_wrapper import wrap_with_safe_string from galaxy.util import odict from logging import getLogger @@ -149,10 +151,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): @@ -177,9 +182,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry=datatypes_registry, ext=ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = getattr( dataset_path, "false_path", None ) self.false_extra_files_path = getattr( dataset_path, "false_extra_files_path", None ) @@ -192,13 +201,31 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': return self.false_path elif self.false_extra_files_path is not None and key == 'extra_files_path': + # Path to extra files was rewritten for this job. return self.false_extra_files_path + elif key == 'extra_files_path': + try: + # Assume it is an output and that this wrapper + # will be set with correct "files_path" for this + # job. + return self.files_path + except AttributeError: + # Otherwise, we have an input - delegate to model and + # object store to find the static location of this + # directory. + try: + return self.unsanitized.extra_files_path + except exceptions.ObjectNotFound: + # NestedObjectstore raises an error here + # instead of just returning a non-existent + # path like DiskObjectStore. + raise else: return getattr( self.dataset, key ) diff -r 7a4d321c0e38fa263ea83d29a35a608c3181fcba -r 4145417a6e1c13f82a3de365aadef0fb3ed7ab14 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -330,48 +330,62 @@ '\n' : '__cn__', '\r' : '__cr__', '\t' : '__tc__', - '#' : '__pd__' - } + '#': '__pd__'} -def restore_text(text): + +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): + +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): + +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: - raise Exception, 'Unknown parameter type (%s)' % ( type( value ) ) + raise Exception('Unknown parameter type (%s)' % ( type( value ) )) valid_filename_chars = set( string.ascii_letters + string.digits + '_.' ) invalid_filenames = [ '', '.', '..' ] diff -r 7a4d321c0e38fa263ea83d29a35a608c3181fcba -r 4145417a6e1c13f82a3de365aadef0fb3ed7ab14 lib/galaxy/util/dbkeys.py --- a/lib/galaxy/util/dbkeys.py +++ b/lib/galaxy/util/dbkeys.py @@ -4,6 +4,7 @@ #dbkeys read from disk using builds.txt from galaxy.util import dbnames from galaxy.util.json import from_json_string +from galaxy.util.object_wrapper import sanitize_lists_to_string import os.path @@ -84,6 +85,7 @@ # use configured server len path if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % dbkey ) + # Since we are using an unverified dbkey, we will sanitize the dbkey before use + chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % sanitize_lists_to_string( dbkey ) ) chrom_info = os.path.abspath( chrom_info ) return ( chrom_info, db_dataset ) diff -r 7a4d321c0e38fa263ea83d29a35a608c3181fcba -r 4145417a6e1c13f82a3de365aadef0fb3ed7ab14 lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/5d390ca2627a/ Changeset: 5d390ca2627a Branch: stable User: natefoo Date: 2015-01-13 15:27:39+00:00 Summary: Update tag latest_2014.06.02 for changeset 4145417a6e1c Affected #: 1 file diff -r c46ef9a4aa9cb7eca5951d82bf9aeba408a48496 -r 5d390ca2627a4e9b7ab004da072f21b15dc25501 .hgtags --- a/.hgtags +++ b/.hgtags @@ -8,7 +8,7 @@ 5e605ed6069fe4c5ca9875e95e91b2713499e8ca release_2014.02.10 9e53251b0b7e93b9563008a2b112f2e815a04bbc release_2014.04.14 7e257c7b10badb65772b1528cb61d58175a42e47 release_2014.06.02 -7a4d321c0e38fa263ea83d29a35a608c3181fcba latest_2014.06.02 +4145417a6e1c13f82a3de365aadef0fb3ed7ab14 latest_2014.06.02 8f9dcac033694e4cabcf5daae5cca1cfefbe967f latest_2014.04.14 9c323aad4ffdd65a3deb06a4a36f6b2c5115a60f latest_2013.01.13 b986c184be88947b5d1d90be7f36cfd2627dd938 latest_2013.02.08 https://bitbucket.org/galaxy/galaxy-central/commits/ffacc6553f1a/ Changeset: ffacc6553f1a Branch: stable User: natefoo Date: 2015-01-13 15:27:40+00:00 Summary: Merge head created for security fix on latest_2014.06.02 Affected #: 4 files https://bitbucket.org/galaxy/galaxy-central/commits/8150024c0e6f/ Changeset: 8150024c0e6f Branch: stable User: dan Date: 2015-01-13 15:27:43+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 6 files diff -r 548ab24667d6206780237bd807f7d857a484c461 -r 8150024c0e6fc5aef3033cf8aaa574896f6b5d0d lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -17,6 +17,7 @@ import galaxy.model from galaxy.util import listify, stringify_dictionary_keys, string_as_bool +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util.odict import odict from galaxy.util import in_directory from galaxy.web import form_builder @@ -227,6 +228,9 @@ def to_string( self, value ): return str( value ) + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -475,6 +479,10 @@ def to_string( self, value ): return json.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return json.dumps( value ) + class PythonObjectParameter( MetadataParameter ): @@ -505,6 +513,10 @@ return str( self.spec.no_value ) return value.file_name + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context=None, other_values=None, **kwd ): context = context or {} other_values = other_values or {} diff -r 548ab24667d6206780237bd807f7d857a484c461 -r 8150024c0e6fc5aef3033cf8aaa574896f6b5d0d lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -2,10 +2,12 @@ import tempfile from galaxy import model +from galaxy.util.object_wrapper import wrap_with_safe_string from galaxy.util.bunch import Bunch from galaxy.util.none_like import NoneDataset from galaxy.util.template import fill_template from galaxy.tools.wrappers import ( + ToolParameterValueWrapper, DatasetFilenameWrapper, DatasetListWrapper, DatasetCollectionWrapper, @@ -114,6 +116,9 @@ self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters @@ -334,6 +339,24 @@ #the paths rewritten. self.__walk_inputs( self.tool.inputs, param_dict, rewrite_unstructured_paths ) + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.tool or not self.tool.options or self.tool.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) + def build( self ): """ Build runtime description of job to execute, evaluate command and diff -r 548ab24667d6206780237bd807f7d857a484c461 -r 8150024c0e6fc5aef3033cf8aaa574896f6b5d0d lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -1,6 +1,7 @@ import pipes from galaxy import exceptions from galaxy.util.none_like import NoneDataset +from galaxy.util.object_wrapper import wrap_with_safe_string from galaxy.util import odict from logging import getLogger @@ -162,10 +163,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): @@ -190,9 +194,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry=datatypes_registry, ext=ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.false_path = getattr( dataset_path, "false_path", None ) self.false_extra_files_path = getattr( dataset_path, "false_extra_files_path", None ) @@ -205,7 +213,7 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': @@ -225,7 +233,7 @@ # object store to find the static location of this # directory. try: - return self.dataset.extra_files_path + return self.unsanitized.extra_files_path except exceptions.ObjectNotFound: # NestedObjectstore raises an error here # instead of just returning a non-existent diff -r 548ab24667d6206780237bd807f7d857a484c461 -r 8150024c0e6fc5aef3033cf8aaa574896f6b5d0d lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -345,46 +345,57 @@ '#': '__pd__'} -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) - -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: raise Exception('Unknown parameter type (%s)' % ( type( value ) )) diff -r 548ab24667d6206780237bd807f7d857a484c461 -r 8150024c0e6fc5aef3033cf8aaa574896f6b5d0d lib/galaxy/util/dbkeys.py --- a/lib/galaxy/util/dbkeys.py +++ b/lib/galaxy/util/dbkeys.py @@ -4,6 +4,7 @@ #dbkeys read from disk using builds.txt from galaxy.util import dbnames from galaxy.util.json import from_json_string +from galaxy.util.object_wrapper import sanitize_lists_to_string import os.path @@ -84,6 +85,7 @@ # use configured server len path if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % dbkey ) + # Since we are using an unverified dbkey, we will sanitize the dbkey before use + chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % sanitize_lists_to_string( dbkey ) ) chrom_info = os.path.abspath( chrom_info ) return ( chrom_info, db_dataset ) diff -r 548ab24667d6206780237bd807f7d857a484c461 -r 8150024c0e6fc5aef3033cf8aaa574896f6b5d0d lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/c776fba69528/ Changeset: c776fba69528 Branch: stable User: natefoo Date: 2015-01-13 15:27:46+00:00 Summary: Update tag latest_2014.08.11 for changeset 8150024c0e6f Affected #: 1 file diff -r ffacc6553f1ad02470d92e058c15848251e12453 -r c776fba69528715271b416a97928246888180d80 .hgtags --- a/.hgtags +++ b/.hgtags @@ -18,6 +18,6 @@ 7d5aa19a166cba9039e15f338a1e3fc924c43d3a latest_2013.11.04 0c000cc2f9c05bf4c1c2bc3a10215014fd64e696 latest_2014.02.10 ca45b78adb4152fc6e7395514d46eba6b7d0b838 release_2014.08.11 -548ab24667d6206780237bd807f7d857a484c461 latest_2014.08.11 +8150024c0e6fc5aef3033cf8aaa574896f6b5d0d latest_2014.08.11 2092948937ac30ef82f71463a235c66d34987088 release_2014.10.06 ff6e36d7a2388214fe7636c43d30838c07246907 latest_2014.10.06 https://bitbucket.org/galaxy/galaxy-central/commits/37201f6fe299/ Changeset: 37201f6fe299 Branch: stable User: natefoo Date: 2015-01-13 15:27:47+00:00 Summary: Merge head created for security fix on latest_2014.08.11 Affected #: 3 files https://bitbucket.org/galaxy/galaxy-central/commits/c437b28348a9/ Changeset: c437b28348a9 Branch: stable User: dan Date: 2015-01-13 15:27:49+00:00 Summary: Fix a critical security vulnerability where unsanitized user-modifiable values could be included in a command line template. Affected #: 6 files diff -r ff6e36d7a2388214fe7636c43d30838c07246907 -r c437b28348a9345db8433e5b4f0e05ec8fb6c38a lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -20,6 +20,7 @@ import galaxy.model from galaxy.util import listify +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util import stringify_dictionary_keys from galaxy.util import string_as_bool from galaxy.util import in_directory @@ -232,6 +233,9 @@ def to_string( self, value ): return str( value ) + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -480,6 +484,10 @@ def to_string( self, value ): return json.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return json.safe_dumps( value ) + class PythonObjectParameter( MetadataParameter ): @@ -510,6 +518,10 @@ return str( self.spec.no_value ) return value.file_name + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context=None, other_values=None, **kwd ): context = context or {} other_values = other_values or {} diff -r ff6e36d7a2388214fe7636c43d30838c07246907 -r c437b28348a9345db8433e5b4f0e05ec8fb6c38a lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -2,10 +2,12 @@ import tempfile from galaxy import model +from galaxy.util.object_wrapper import wrap_with_safe_string from galaxy.util.bunch import Bunch from galaxy.util.none_like import NoneDataset from galaxy.util.template import fill_template from galaxy.tools.wrappers import ( + ToolParameterValueWrapper, DatasetFilenameWrapper, DatasetListWrapper, DatasetCollectionWrapper, @@ -114,6 +116,9 @@ self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters @@ -334,6 +339,24 @@ #the paths rewritten. self.__walk_inputs( self.tool.inputs, param_dict, rewrite_unstructured_paths ) + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.tool or not self.tool.options or self.tool.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) + def build( self ): """ Build runtime description of job to execute, evaluate command and diff -r ff6e36d7a2388214fe7636c43d30838c07246907 -r c437b28348a9345db8433e5b4f0e05ec8fb6c38a lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -2,6 +2,7 @@ from galaxy import exceptions from galaxy.util.none_like import NoneDataset from galaxy.util import odict +from galaxy.util.object_wrapper import wrap_with_safe_string from logging import getLogger log = getLogger( __name__ ) @@ -162,10 +163,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): @@ -190,9 +194,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry=datatypes_registry, ext=ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.datatypes_registry = datatypes_registry self.false_path = getattr( dataset_path, "false_path", None ) @@ -210,7 +218,7 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': @@ -230,7 +238,7 @@ # object store to find the static location of this # directory. try: - return self.dataset.extra_files_path + return self.unsanitized.extra_files_path except exceptions.ObjectNotFound: # NestedObjectstore raises an error here # instead of just returning a non-existent diff -r ff6e36d7a2388214fe7636c43d30838c07246907 -r c437b28348a9345db8433e5b4f0e05ec8fb6c38a lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -360,46 +360,57 @@ '#': '__pd__'} -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) - -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: raise Exception('Unknown parameter type (%s)' % ( type( value ) )) diff -r ff6e36d7a2388214fe7636c43d30838c07246907 -r c437b28348a9345db8433e5b4f0e05ec8fb6c38a lib/galaxy/util/dbkeys.py --- a/lib/galaxy/util/dbkeys.py +++ b/lib/galaxy/util/dbkeys.py @@ -4,6 +4,7 @@ #dbkeys read from disk using builds.txt from galaxy.util import read_dbnames from galaxy.util.json import loads +from galaxy.util.object_wrapper import sanitize_lists_to_string import os.path @@ -84,6 +85,7 @@ # use configured server len path if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % dbkey ) + # Since we are using an unverified dbkey, we will sanitize the dbkey before use + chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % sanitize_lists_to_string( dbkey ) ) chrom_info = os.path.abspath( chrom_info ) return ( chrom_info, db_dataset ) diff -r ff6e36d7a2388214fe7636c43d30838c07246907 -r c437b28348a9345db8433e5b4f0e05ec8fb6c38a lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/593bd69b3d5b/ Changeset: 593bd69b3d5b Branch: stable User: natefoo Date: 2015-01-13 15:27:50+00:00 Summary: Update tag latest_2014.10.06 for changeset c437b28348a9 Affected #: 1 file diff -r 37201f6fe299a099fad0b5f54ff0ba72a02185cb -r 593bd69b3d5b937c2eb0ae5133a8f989b3131504 .hgtags --- a/.hgtags +++ b/.hgtags @@ -20,4 +20,4 @@ ca45b78adb4152fc6e7395514d46eba6b7d0b838 release_2014.08.11 8150024c0e6fc5aef3033cf8aaa574896f6b5d0d latest_2014.08.11 2092948937ac30ef82f71463a235c66d34987088 release_2014.10.06 -ff6e36d7a2388214fe7636c43d30838c07246907 latest_2014.10.06 +c437b28348a9345db8433e5b4f0e05ec8fb6c38a latest_2014.10.06 https://bitbucket.org/galaxy/galaxy-central/commits/605de23ca239/ Changeset: 605de23ca239 Branch: stable User: natefoo Date: 2015-01-13 15:27:51+00:00 Summary: Merge head created for security fix on latest_2014.10.06 Affected #: 5 files diff -r 593bd69b3d5b937c2eb0ae5133a8f989b3131504 -r 605de23ca239879a34426885feddd2d3a459ca26 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -20,6 +20,7 @@ import galaxy.model from galaxy.util import listify +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util import stringify_dictionary_keys from galaxy.util import string_as_bool from galaxy.util import in_directory @@ -232,6 +233,9 @@ def to_string( self, value ): return str( value ) + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -480,6 +484,10 @@ def to_string( self, value ): return json.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return json.safe_dumps( value ) + class PythonObjectParameter( MetadataParameter ): @@ -510,6 +518,10 @@ return str( self.spec.no_value ) return value.file_name + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context=None, other_values=None, **kwd ): context = context or {} other_values = other_values or {} diff -r 593bd69b3d5b937c2eb0ae5133a8f989b3131504 -r 605de23ca239879a34426885feddd2d3a459ca26 lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -2,10 +2,12 @@ import tempfile from galaxy import model +from galaxy.util.object_wrapper import wrap_with_safe_string from galaxy.util.bunch import Bunch from galaxy.util.none_like import NoneDataset from galaxy.util.template import fill_template from galaxy.tools.wrappers import ( + ToolParameterValueWrapper, DatasetFilenameWrapper, DatasetListWrapper, DatasetCollectionWrapper, @@ -114,6 +116,9 @@ self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters @@ -334,6 +339,24 @@ #the paths rewritten. self.__walk_inputs( self.tool.inputs, param_dict, rewrite_unstructured_paths ) + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.tool or not self.tool.options or self.tool.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) + def build( self ): """ Build runtime description of job to execute, evaluate command and diff -r 593bd69b3d5b937c2eb0ae5133a8f989b3131504 -r 605de23ca239879a34426885feddd2d3a459ca26 lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -2,6 +2,7 @@ from galaxy import exceptions from galaxy.util.none_like import NoneDataset from galaxy.util import odict +from galaxy.util.object_wrapper import wrap_with_safe_string from logging import getLogger log = getLogger( __name__ ) @@ -162,10 +163,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): @@ -190,9 +194,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry=datatypes_registry, ext=ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.datatypes_registry = datatypes_registry self.false_path = getattr( dataset_path, "false_path", None ) @@ -210,7 +218,7 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': @@ -230,7 +238,7 @@ # object store to find the static location of this # directory. try: - return self.dataset.extra_files_path + return self.unsanitized.extra_files_path except exceptions.ObjectNotFound: # NestedObjectstore raises an error here # instead of just returning a non-existent diff -r 593bd69b3d5b937c2eb0ae5133a8f989b3131504 -r 605de23ca239879a34426885feddd2d3a459ca26 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -360,46 +360,57 @@ '#': '__pd__'} -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) - -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: raise Exception('Unknown parameter type (%s)' % ( type( value ) )) diff -r 593bd69b3d5b937c2eb0ae5133a8f989b3131504 -r 605de23ca239879a34426885feddd2d3a459ca26 lib/galaxy/util/dbkeys.py --- a/lib/galaxy/util/dbkeys.py +++ b/lib/galaxy/util/dbkeys.py @@ -4,6 +4,7 @@ #dbkeys read from disk using builds.txt from galaxy.util import read_dbnames from galaxy.util.json import loads +from galaxy.util.object_wrapper import sanitize_lists_to_string import os.path @@ -84,6 +85,7 @@ # use configured server len path if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % dbkey ) + # Since we are using an unverified dbkey, we will sanitize the dbkey before use + chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % sanitize_lists_to_string( dbkey ) ) chrom_info = os.path.abspath( chrom_info ) return ( chrom_info, db_dataset ) https://bitbucket.org/galaxy/galaxy-central/commits/8da5fd3df6e7/ Changeset: 8da5fd3df6e7 Branch: next-stable User: natefoo Date: 2015-01-13 15:33:41+00:00 Summary: Close next-stable branch for release_2015.01.13 Affected #: 6 files diff -r 7fe5cbb6b66e0debec7775a88a8c553384784a00 -r 8da5fd3df6e7e710bd9c9e5acc3baa98a665e247 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -20,6 +20,7 @@ import galaxy.model from galaxy.util import listify +from galaxy.util.object_wrapper import sanitize_lists_to_string from galaxy.util import stringify_dictionary_keys from galaxy.util import string_as_bool from galaxy.util import in_directory @@ -232,6 +233,9 @@ def to_string( self, value ): return str( value ) + def to_safe_string( self, value ): + return sanitize_lists_to_string( self.to_string( value ) ) + def make_copy( self, value, target_context = None, source_context = None ): return copy.deepcopy( value ) @@ -480,6 +484,10 @@ def to_string( self, value ): return json.dumps( value ) + def to_safe_string( self, value ): + # We do not sanitize json dicts + return json.safe_dumps( value ) + class PythonObjectParameter( MetadataParameter ): @@ -510,6 +518,10 @@ return str( self.spec.no_value ) return value.file_name + def to_safe_string( self, value ): + # We do not sanitize file names + return self.to_string( value ) + def get_html_field( self, value=None, context=None, other_values=None, **kwd ): context = context or {} other_values = other_values or {} diff -r 7fe5cbb6b66e0debec7775a88a8c553384784a00 -r 8da5fd3df6e7e710bd9c9e5acc3baa98a665e247 lib/galaxy/tools/evaluation.py --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -2,10 +2,12 @@ import tempfile from galaxy import model +from galaxy.util.object_wrapper import wrap_with_safe_string from galaxy.util.bunch import Bunch from galaxy.util.none_like import NoneDataset from galaxy.util.template import fill_template from galaxy.tools.wrappers import ( + ToolParameterValueWrapper, DatasetFilenameWrapper, DatasetListWrapper, DatasetCollectionWrapper, @@ -114,6 +116,9 @@ self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) + # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. + self.__sanitize_param_dict( param_dict ) + # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters @@ -334,6 +339,24 @@ #the paths rewritten. self.__walk_inputs( self.tool.inputs, param_dict, rewrite_unstructured_paths ) + def __sanitize_param_dict( self, param_dict ): + """ + Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, + which already have their own specific sanitization rules and also exclude special-cased named values. + We will only examine the first level for values to skip; the wrapping function will recurse as necessary. + + Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. + """ + # chromInfo is a filename, do not sanitize it. + skip = [ 'chromInfo' ] + if not self.tool or not self.tool.options or self.tool.options.sanitize: + for key, value in param_dict.items(): + if key not in skip: + # Remove key so that new wrapped object will occupy key slot + del param_dict[key] + # And replace with new wrapped key + param_dict[ wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper ) ] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper ) + def build( self ): """ Build runtime description of job to execute, evaluate command and diff -r 7fe5cbb6b66e0debec7775a88a8c553384784a00 -r 8da5fd3df6e7e710bd9c9e5acc3baa98a665e247 lib/galaxy/tools/wrappers.py --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -2,6 +2,7 @@ from galaxy import exceptions from galaxy.util.none_like import NoneDataset from galaxy.util import odict +from galaxy.util.object_wrapper import wrap_with_safe_string from logging import getLogger log = getLogger( __name__ ) @@ -162,10 +163,13 @@ if name in self.metadata.spec: if rval is None: rval = self.metadata.spec[name].no_value - rval = self.metadata.spec[name].param.to_string( rval ) + rval = self.metadata.spec[ name ].param.to_safe_string( rval ) # Store this value, so we don't need to recalculate if needed # again setattr( self, name, rval ) + else: + #escape string value of non-defined metadata value + rval = wrap_with_safe_string( rval ) return rval def __nonzero__( self ): @@ -190,9 +194,13 @@ ext = tool.inputs[name].extensions[0] except: ext = 'data' - self.dataset = NoneDataset( datatypes_registry=datatypes_registry, ext=ext ) + self.dataset = wrap_with_safe_string( NoneDataset( datatypes_registry=datatypes_registry, ext=ext ), no_wrap_classes=ToolParameterValueWrapper ) else: - self.dataset = dataset + # Tool wrappers should not normally be accessing .dataset directly, + # so we will wrap it and keep the original around for file paths + # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? + self.unsanitized = dataset + self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper ) self.metadata = self.MetadataWrapper( dataset.metadata ) self.datatypes_registry = datatypes_registry self.false_path = getattr( dataset_path, "false_path", None ) @@ -210,7 +218,7 @@ if self.false_path is not None: return self.false_path else: - return self.dataset.file_name + return self.unsanitized.file_name def __getattr__( self, key ): if self.false_path is not None and key == 'file_name': @@ -230,7 +238,7 @@ # object store to find the static location of this # directory. try: - return self.dataset.extra_files_path + return self.unsanitized.extra_files_path except exceptions.ObjectNotFound: # NestedObjectstore raises an error here # instead of just returning a non-existent diff -r 7fe5cbb6b66e0debec7775a88a8c553384784a00 -r 8da5fd3df6e7e710bd9c9e5acc3baa98a665e247 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -360,46 +360,57 @@ '#': '__pd__'} -def restore_text(text): +def restore_text( text, character_map=mapped_chars ): """Restores sanitized text""" if not text: return text - for key, value in mapped_chars.items(): + for key, value in character_map.items(): text = text.replace(value, key) return text -def sanitize_text(text): +def sanitize_text( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """ Restricts the characters that are allowed in text; accepts both strings - and lists of strings. + and lists of strings; non-string entities will be cast to strings. """ - if isinstance( text, basestring ): - return _sanitize_text_helper(text) - elif isinstance( text, list ): - return [ _sanitize_text_helper(t) for t in text ] + if isinstance( text, list ): + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), text ) + if not isinstance( text, basestring ): + text = smart_str( text ) + return _sanitize_text_helper( text, valid_characters=valid_characters, character_map=character_map ) - -def _sanitize_text_helper(text): +def _sanitize_text_helper( text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Restricts the characters that are allowed in a string""" out = [] for c in text: - if c in valid_chars: + if c in valid_characters: out.append(c) - elif c in mapped_chars: - out.append(mapped_chars[c]) + elif c in character_map: + out.append( character_map[c] ) else: - out.append('X') # makes debugging easier + out.append( invalid_character ) # makes debugging easier return ''.join(out) -def sanitize_param(value): +def sanitize_lists_to_string( values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): + if isinstance( values, list ): + rval = [] + for value in values: + rval.append( sanitize_lists_to_string( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) ) + values = ",".join( rval ) + else: + values = sanitize_text( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + return values + + +def sanitize_param( value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X' ): """Clean incoming parameters (strings or lists)""" if isinstance( value, basestring ): - return sanitize_text(value) + return sanitize_text( value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) elif isinstance( value, list ): - return map(sanitize_text, value) + return map( lambda x: sanitize_text( x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ), value ) else: raise Exception('Unknown parameter type (%s)' % ( type( value ) )) diff -r 7fe5cbb6b66e0debec7775a88a8c553384784a00 -r 8da5fd3df6e7e710bd9c9e5acc3baa98a665e247 lib/galaxy/util/dbkeys.py --- a/lib/galaxy/util/dbkeys.py +++ b/lib/galaxy/util/dbkeys.py @@ -4,6 +4,7 @@ #dbkeys read from disk using builds.txt from galaxy.util import read_dbnames from galaxy.util.json import loads +from galaxy.util.object_wrapper import sanitize_lists_to_string import os.path @@ -84,6 +85,7 @@ # use configured server len path if not chrom_info: # Default to built-in build. - chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % dbkey ) + # Since we are using an unverified dbkey, we will sanitize the dbkey before use + chrom_info = os.path.join( self._static_chrom_info_path, "%s.len" % sanitize_lists_to_string( dbkey ) ) chrom_info = os.path.abspath( chrom_info ) return ( chrom_info, db_dataset ) diff -r 7fe5cbb6b66e0debec7775a88a8c553384784a00 -r 8da5fd3df6e7e710bd9c9e5acc3baa98a665e247 lib/galaxy/util/object_wrapper.py --- /dev/null +++ b/lib/galaxy/util/object_wrapper.py @@ -0,0 +1,436 @@ +""" +Classes for wrapping Objects and Sanitizing string output. +""" + +import inspect +import copy_reg +import logging +import string +from numbers import Number +from types import ( NoneType, NotImplementedType, EllipsisType, FunctionType, MethodType, GeneratorType, CodeType, + BuiltinFunctionType, BuiltinMethodType, ModuleType, XRangeType, SliceType, TracebackType, FrameType, + BufferType, DictProxyType, GetSetDescriptorType, MemberDescriptorType ) +from UserDict import UserDict + +from galaxy.util import sanitize_lists_to_string as _sanitize_lists_to_string + +log = logging.getLogger( __name__ ) + +# Define different behaviors for different types, see also: https://docs.python.org/2/library/types.html + +# Known Callable types +__CALLABLE_TYPES__ = ( FunctionType, MethodType, GeneratorType, CodeType, BuiltinFunctionType, BuiltinMethodType, ) + +# Always wrap these types without attempting to subclass +__WRAP_NO_SUBCLASS__ = ( ModuleType, XRangeType, SliceType, BufferType, TracebackType, FrameType, DictProxyType, + GetSetDescriptorType, MemberDescriptorType ) + __CALLABLE_TYPES__ + +# Don't wrap or sanitize. +__DONT_SANITIZE_TYPES__ = ( Number, bool, NoneType, NotImplementedType, EllipsisType, bytearray, ) + +# Don't wrap, but do sanitize. +__DONT_WRAP_TYPES__ = tuple() #( basestring, ) so that we can get the unsanitized string, we will now wrap basestring instances + +# Wrap contents, but not the container +__WRAP_SEQUENCES__ = ( tuple, list, ) +__WRAP_SETS__ = ( set, frozenset, ) +__WRAP_MAPPINGS__ = ( dict, UserDict, ) + + +# Define the set of characters that are not sanitized, and define a set of mappings for those that are. +# characters that are valid +VALID_CHARACTERS = set( string.letters + string.digits + " -=_.()/+*^,:?!@" ) + +# characters that are allowed but need to be escaped +CHARACTER_MAP = { '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__'} + +INVALID_CHARACTER = "X" + +def sanitize_lists_to_string( values, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP, invalid_character=INVALID_CHARACTER ): + return _sanitize_lists_to_string( values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character ) + + +def wrap_with_safe_string( value, no_wrap_classes = None ): + """ + Recursively wrap values that should be wrapped. + """ + + def __do_wrap( value ): + if isinstance( value, SafeStringWrapper ): + # Only ever wrap one-layer + return value + if callable( value ): + safe_class = CallableSafeStringWrapper + else: + safe_class = SafeStringWrapper + if isinstance( value, no_wrap_classes ): + return value + if isinstance( value, __DONT_WRAP_TYPES__ ): + return sanitize_lists_to_string( value, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + if isinstance( value, __WRAP_NO_SUBCLASS__ ): + return safe_class( value, safe_string_wrapper_function = __do_wrap ) + for this_type in __WRAP_SEQUENCES__ + __WRAP_SETS__: + if isinstance( value, this_type ): + return this_type( map( __do_wrap, value ) ) + for this_type in __WRAP_MAPPINGS__: + if isinstance( value, this_type ): + # Wrap both key and value + return this_type( map( lambda x: ( __do_wrap( x[0] ), __do_wrap( x[1] ) ), value.items() ) ) + # Create a dynamic class that joins SafeStringWrapper with the object being wrapped. + # This allows e.g. isinstance to continue to work. + try: + wrapped_class_name = value.__name__ + wrapped_class = value + except: + wrapped_class_name = value.__class__.__name__ + wrapped_class = value.__class__ + value_mod = inspect.getmodule( value ) + if value_mod: + wrapped_class_name = "%s.%s" % ( value_mod.__name__, wrapped_class_name ) + wrapped_class_name = "SafeStringWrapper(%s:%s)" % ( wrapped_class_name, ",".join( sorted( map( str, no_wrap_classes ) ) ) ) + do_wrap_func_name = "__do_wrap_%s" % ( wrapped_class_name ) + do_wrap_func = __do_wrap + global_dict = globals() + if wrapped_class_name in global_dict: + # Check to see if we have created a wrapper for this class yet, if so, reuse + wrapped_class = global_dict.get( wrapped_class_name ) + do_wrap_func = global_dict.get( do_wrap_func_name, __do_wrap ) + else: + try: + wrapped_class = type( wrapped_class_name, ( safe_class, wrapped_class, ), {} ) + except TypeError, e: + # Fail-safe for when a class cannot be dynamically subclassed. + log.warning( "Unable to create dynamic subclass for %s, %s: %s", type( value), value, e ) + wrapped_class = type( wrapped_class_name, ( safe_class, ), {} ) + if wrapped_class not in ( SafeStringWrapper, CallableSafeStringWrapper ): + # Save this wrapper for reuse and pickling/copying + global_dict[ wrapped_class_name ] = wrapped_class + do_wrap_func.__name__ = do_wrap_func_name + global_dict[ do_wrap_func_name ] = do_wrap_func + def pickle_safe_object( safe_object ): + return ( wrapped_class, ( safe_object.unsanitized, do_wrap_func, ) ) + # Set pickle and copy properties + copy_reg.pickle( wrapped_class, pickle_safe_object, do_wrap_func ) + return wrapped_class( value, safe_string_wrapper_function = do_wrap_func ) + # Determine classes not to wrap + if no_wrap_classes: + if not isinstance( no_wrap_classes, ( tuple, list ) ): + no_wrap_classes = [ no_wrap_classes ] + no_wrap_classes = list( no_wrap_classes ) + list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + else: + no_wrap_classes = list( __DONT_SANITIZE_TYPES__ ) + [ SafeStringWrapper ] + no_wrap_classes = tuple( set( sorted( no_wrap_classes, key=str ) ) ) + return __do_wrap( value ) + + +# N.B. refer to e.g. https://docs.python.org/2/reference/datamodel.html for information on Python's Data Model. + + +class SafeStringWrapper( object ): + """ + Class that wraps and sanitizes any provided value's attributes + that will attempt to be cast into a string. + + Attempts to mimic behavior of original class, including operands. + + To ensure proper handling of e.g. subclass checks, the *wrap_with_safe_string()* + method should be used. + + This wrapping occurs in a recursive/parasitic fashion, as all called attributes of + the originally wrapped object will also be wrapped and sanitized, unless the attribute + is of a type found in __DONT_SANITIZE_TYPES__ + __DONT_WRAP_TYPES__, where e.g. ~(strings + will still be sanitized, but not wrapped), and e.g. integers will have neither. + """ + __UNSANITIZED_ATTRIBUTE_NAME__ = 'unsanitized' + __NO_WRAP_NAMES__ = [ '__safe_string_wrapper_function__', __UNSANITIZED_ATTRIBUTE_NAME__] + + + def __new__( cls, *arg, **kwd ): + # We need to define a __new__ since, we are subclassing from e.g. immutable str, which internally sets data + # that will be used when other + this (this + other is handled by __add__) + safe_string_wrapper_function = kwd.get( 'safe_string_wrapper_function', None) or wrap_with_safe_string + try: + return super( SafeStringWrapper, cls ).__new__( cls, sanitize_lists_to_string( arg[0], valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + except Exception, e: + log.warning( "Could not provide an argument to %s.__new__: %s; will try without arguments.", cls, e ) + return super( SafeStringWrapper, cls ).__new__( cls ) + + def __init__( self, value, safe_string_wrapper_function = wrap_with_safe_string ): + self.unsanitized = value + self.__safe_string_wrapper_function__ = safe_string_wrapper_function + + def __str__( self ): + return sanitize_lists_to_string( self.unsanitized, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) + + def __repr__( self ): + return "%s object at %x on: %s" % ( sanitize_lists_to_string( self.__class__.__name__, valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ), id( self ), sanitize_lists_to_string( repr( self.unsanitized ), valid_characters=VALID_CHARACTERS, character_map=CHARACTER_MAP ) ) + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __le__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized <= other + + def __eq__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized == other + + def __ne__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized != other + + def __gt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized > other + + def __ge__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized >= other + + def __lt__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.unsanitized < other + + def __cmp__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return cmp( self.unsanitized, other ) + + # Do not implement __rcmp__, python 2.2 < 2.6 + + def __hash__( self ): + return hash( self.unsanitized ) + + def __nonzero__( self ): + return bool( self.unsanitized ) + + # Do not implement __unicode__, we will rely on __str__ + + def __getattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + #FIXME: is this ever reached? + return object.__getattr__( self, name ) + return self.__safe_string_wrapper_function__( getattr( self.unsanitized, name ) ) + + def __setattr__( self, name, value ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__setattr__( self, name, value ) + return setattr( self.unsanitized, name, value ) + + def __delattr__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__delattr__( self, name ) + return delattr( self.unsanitized, name ) + + def __getattribute__( self, name ): + if name in SafeStringWrapper.__NO_WRAP_NAMES__: + return object.__getattribute__( self, name ) + return self.__safe_string_wrapper_function__( getattr( object.__getattribute__( self, 'unsanitized' ), name ) ) + + # Skip Descriptors + + # Skip __slots__ + + # Don't need __metaclass__, we'll use the helper function to handle with subclassing for e.g. isinstance() + + # Revisit: + # __instancecheck__ + # __subclasscheck__ + # We are using a helper class to create dynamic subclasses to handle class checks + + # We address __call__ as needed based upon unsanitized, through the use of a CallableSafeStringWrapper class + + def __len__( self ): + original_value = self.unsanitized + while isinstance( original_value, SafeStringWrapper ): + original_value = self.unsanitized + return len( self.unsanitized ) + + def __getitem__( self, key ): + return self.__safe_string_wrapper_function__( self.unsanitized[ key ] ) + + def __setitem__( self, key, value ): + while isinstance( value, SafeStringWrapper ): + value = value.unsanitized + self.unsanitized[ key ] = value + + def __delitem__( self, key ): + del self.unsanitized[ key ] + + def __iter__( self ): + return iter( map( self.__safe_string_wrapper_function__, iter( self.unsanitized ) ) ) + + # Do not implement __reversed__ + + def __contains__( self, item ): + # FIXME: Do we need to consider if item is/isn't or does/doesn't contain SafeStringWrapper? + # When considering e.g. nested lists/dicts/etc, this gets complicated + while isinstance( item, SafeStringWrapper ): + item = item.unsanitized + return item in self.unsanitized + + # Not sure that we need these slice methods, but will provide anyway + def __getslice__( self, i, j ): + return self.__safe_string_wrapper_function__( self.unsanitized[ i:j ] ) + + def __setslice__( self, i, j, value ): + self.unsanitized[ i:j ] = value + + def __delslice__( self, i, j ): + del self.unsanitized[ i:j ] + + def __add__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized + other ) + + def __sub__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized - other ) + + def __mul__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized * other ) + + def __floordiv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized // other ) + + def __mod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized % other ) + + def __divmod__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( divmod( self.unsanitized, other ) ) + + def __pow__( self, *other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( self.unsanitized, *other ) ) + + def __lshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized << other ) + + def __rshift__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized >> other ) + + def __and__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized & other ) + + def __xor__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized ^ other ) + + def __or__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized | other ) + + def __div__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + def __truediv__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( self.unsanitized / other ) + + # The only reflected operand that we will define is __rpow__, due to coercion rules complications as per docs + def __rpow__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return self.__safe_string_wrapper_function__( pow( other, self.unsanitized ) ) + + # Do not implement in-place operands + + def __neg__( self ): + return __safe_string_wrapper_function__( -self.unsanitized ) + + def __pos__( self ): + return __safe_string_wrapper_function__( +self.unsanitized ) + + def __abs__( self ): + return __safe_string_wrapper_function__( abs( self.unsanitized ) ) + + def __invert__( self ): + return __safe_string_wrapper_function__( ~self.unsanitized ) + + def __complex__( self ): + return __safe_string_wrapper_function__( complex( self.unsanitized ) ) + + def __int__( self ): + return int( self.unsanitized ) + + def __float__( self ): + return float( self.unsanitized ) + + def __oct__( self ): + return oct( self.unsanitized ) + + def __hex__( self ): + return hex( self.unsanitized ) + + def __index__( self ): + return self.unsanitized.index() + + def __coerce__( self, other ): + while isinstance( other, SafeStringWrapper ): + other = other.unsanitized + return coerce( self.unsanitized, other ) + + def __enter__( self ): + return self.unsanitized.__enter__() + + def __exit__( self, *args ): + return self.unsanitized.__exit__( *args ) + +class CallableSafeStringWrapper( SafeStringWrapper ): + + def __call__( self, *args, **kwds ): + return self.__safe_string_wrapper_function__( self.unsanitized( *args, **kwds ) ) + + +# Enable pickling/deepcopy +def pickle_SafeStringWrapper( safe_object ): + args = ( safe_object.unsanitized, ) + cls = SafeStringWrapper + if isinstance( safe_object, CallableSafeStringWrapper ): + cls = CallableSafeStringWrapper + return ( cls, args ) +copy_reg.pickle( SafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) +copy_reg.pickle( CallableSafeStringWrapper, pickle_SafeStringWrapper, wrap_with_safe_string ) + https://bitbucket.org/galaxy/galaxy-central/commits/2e8dd2949dd3/ Changeset: 2e8dd2949dd3 Branch: stable User: natefoo Date: 2015-01-13 15:33:50+00:00 Summary: Merge next-stable to stable for release_2015.01.13 Affected #: 427 files diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee .hgignore --- a/.hgignore +++ b/.hgignore @@ -67,6 +67,7 @@ shed_data_manager_conf.xml object_store_conf.xml job_metrics_conf.xml +workflow_schedulers_conf.xml config/* static/welcome.html.* static/welcome.html diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee buildbot_setup.sh --- a/buildbot_setup.sh +++ b/buildbot_setup.sh @@ -126,5 +126,3 @@ echo "Appending tool-data/shared/ucsc/builds.txt.buildbot to tool-data/shared/ucsc/builds.txt" cat tool-data/shared/ucsc/builds.txt.buildbot >> tool-data/shared/ucsc/builds.txt - -python ./scripts/fetch_eggs.py all diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/galaxy.frame.js --- a/client/galaxy/scripts/galaxy.frame.js +++ b/client/galaxy/scripts/galaxy.frame.js @@ -1,9 +1,8 @@ // dependencies define(["galaxy.masthead", "mvc/ui/ui-frames"], function(mod_masthead, Frames) { -// frame manager -var GalaxyFrame = Backbone.View.extend( -{ +/** Frame manager uses the ui-frames to create the scratch book masthead icon and functionality **/ +var GalaxyFrame = Backbone.View.extend({ // base element el_main: 'body', @@ -17,8 +16,7 @@ button_load : null, // initialize - initialize : function(options) - { + initialize : function(options) { // add to masthead menu var self = this; @@ -28,8 +26,7 @@ }); // add activate icon - this.button_active = new mod_masthead.GalaxyMastheadIcon ( - { + this.button_active = new mod_masthead.GalaxyMastheadIcon({ icon : 'fa-th', tooltip : 'Enable/Disable Scratchbook', onclick : function() { self._activate(); }, @@ -44,8 +41,7 @@ Galaxy.masthead.append(this.button_active); // add load icon - this.button_load = new mod_masthead.GalaxyMastheadIcon ( - { + this.button_load = new mod_masthead.GalaxyMastheadIcon({ icon : 'fa-eye', tooltip : 'Show/Hide Scratchbook', onclick : function(e) { @@ -122,35 +118,77 @@ }); }, + + /** + * Add a trackster visualization to the frames. + */ + add_trackster_viz: function(viz_id) { + var self = this; + require(['viz/visualization', 'viz/trackster'], function(visualization, trackster) { + var viz = new visualization.Visualization({id: viz_id}); + $.when( viz.fetch() ).then( function() { + var ui = new trackster.TracksterUI(galaxy_config.root); + + // Construct frame config based on dataset's type. + var frame_config = { + title: viz.get('name'), + type: 'other', + content: function(parent_elt) { + // Create view config. + var view_config = { + container: parent_elt, + name: viz.get('title'), + id: viz.id, + // FIXME: this will not work with custom builds b/c the dbkey needed to be encoded. + dbkey: viz.get('dbkey'), + stand_alone: false + }, + latest_revision = viz.get('latest_revision'), + drawables = latest_revision.config.view.drawables; + + // Set up datasets in drawables. + _.each(drawables, function(d) { + d.dataset = { + hda_ldda: d.hda_ldda, + id: d.dataset_id + }; + }); + + view = ui.create_visualization(view_config, + latest_revision.config.viewport, + latest_revision.config.view.drawables, + latest_revision.config.bookmarks, + false); + } + }; + + self.add(frame_config); + }); + }); + }, /** * Add and display a new frame/window based on options. */ - add: function(options) - { + add: function(options){ // open new tab - if (options.target == '_blank') - { + if (options.target == '_blank'){ window.open(options.content); return; } // reload entire window - if (options.target == '_top' || options.target == '_parent' || options.target == '_self') - { + if (options.target == '_top' || options.target == '_parent' || options.target == '_self'){ window.location = options.content; return; } // validate - if (!this.active) - { + if (!this.active){ // fix url if main frame is unavailable var $galaxy_main = $(window.parent.document).find('#galaxy_main'); - if (options.target == 'galaxy_main' || options.target == 'center') - { - if ($galaxy_main.length === 0) - { + if (options.target == 'galaxy_main' || options.target == 'center'){ + if ($galaxy_main.length === 0){ var href = options.content; if (href.indexOf('?') == -1) href += '?'; @@ -173,11 +211,9 @@ }, // activate/disable panel - _activate: function () - { + _activate: function (){ // check - if (this.active) - { + if (this.active){ // disable this.active = false; @@ -196,8 +232,7 @@ }, // update frame counter - _refresh: function() - { + _refresh: function(){ // update on screen counter this.button_load.number(this.frames.length()); diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/galaxy.interactive_environments.js --- /dev/null +++ b/client/galaxy/scripts/galaxy.interactive_environments.js @@ -0,0 +1,59 @@ +/** + * Internal function to remove content from the main area and add the notebook. + * Not idempotent + */ +function append_notebook(url){ + clear_main_area(); + $('#main').append('<iframe frameBorder="0" seamless="seamless" style="width: 100%; height: 100%; overflow:hidden;" scrolling="no" src="'+ url +'"></iframe>' + ); +} + +function clear_main_area(){ + $('#spinner').remove(); + $('#main').children().remove(); +} + +function display_spinner(){ + $('#main').append('<img id="spinner" src="' + galaxy_root + '/static/style/largespinner.gif" style="position:absolute;margin:auto;top:0;left:0;right:0;bottom:0;">'); +} + + +/** + * Test availability of a URL, and call a callback when done. + * http://stackoverflow.com/q/25390206/347368 + * @param {String} url: URL to test availability of. Must return a 200 (302->200 is OK). + * @param {String} callback: function to call once successfully connected. + * + */ +function test_ie_availability(url, success_callback){ + var request_count = 0; + display_spinner(); + interval = setInterval(function(){ + $.ajax({ + url: url, + xhrFields: { + withCredentials: true + }, + type: "GET", + timeout: 500, + success: function(){ + console.log("Connected to IE, returning"); + clearInterval(interval); + success_callback(); + }, + error: function(jqxhr, status, error){ + request_count++; + console.log("Request " + request_count); + if(request_count > 30){ + clearInterval(interval); + clear_main_area(); + toastr.error( + "Could not connect to IE, contact your administrator", + "Error", + {'closeButton': true, 'timeOut': 20000, 'tapToDismiss': false} + ); + } + } + }); + }, 1000); +} diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/galaxy.library.js --- a/client/galaxy/scripts/galaxy.library.js +++ b/client/galaxy/scripts/galaxy.library.js @@ -16,19 +16,19 @@ "mvc/library/library-library-view", "mvc/library/library-folder-view" ], -function(mod_masthead, - mod_utils, - mod_toastr, - mod_baseMVC, - mod_library_model, - mod_folderlist_view, - mod_librarylist_view, - mod_librarytoolbar_view, - mod_foldertoolbar_view, - mod_library_dataset_view, - mod_library_library_view, - mod_library_folder_view - ) { + function(mod_masthead, + mod_utils, + mod_toastr, + mod_baseMVC, + mod_library_model, + mod_folderlist_view, + mod_librarylist_view, + mod_librarytoolbar_view, + mod_foldertoolbar_view, + mod_library_dataset_view, + mod_library_library_view, + mod_library_folder_view + ) { // ============================================================================ // ROUTER @@ -36,40 +36,44 @@ initialize: function() { this.routesHit = 0; //keep count of number of routes handled by the application - Backbone.history.on('route', function() { this.routesHit++; }, this); - }, + Backbone.history.on( 'route', function() { this.routesHit++; }, this ); +}, - routes: { +routes: { "" : "libraries", + "page/:show_page" : "libraries_page", "library/:library_id/permissions" : "library_permissions", "folders/:folder_id/permissions" : "folder_permissions", "folders/:id" : "folder_content", + "folders/:id/page/:show_page" : "folder_page", "folders/:folder_id/datasets/:dataset_id" : "dataset_detail", "folders/:folder_id/datasets/:dataset_id/permissions" : "dataset_permissions", "folders/:folder_id/datasets/:dataset_id/versions/:ldda_id" : "dataset_version", "folders/:folder_id/download/:format" : "download", "folders/:folder_id/import/:source" : "import_datasets" - }, +}, - back: function() { - if(this.routesHit > 1) { +back: function() { + if( this.routesHit > 1 ) { //more than one route hit -> user did not land to current page directly window.history.back(); - } else { + } else { //otherwise go to the home page. Use replaceState if available so //the navigation doesn't create an extra history entry - this.navigate('#', {trigger:true, replace:true}); - } + this.navigate( '#', { trigger:true, replace:true } ); } +} }); // ============================================================================ /** session storage for library preferences */ var LibraryPrefs = mod_baseMVC.SessionStorageModel.extend({ defaults : { - with_deleted : false, - sort_order : 'asc', - sort_by : 'name' + with_deleted : false, + sort_order : 'asc', + sort_by : 'name', + library_page_size : 20, + folder_page_size : 15 } }); @@ -88,77 +92,97 @@ initialize : function(){ Galaxy.libraries = this; - this.preferences = new LibraryPrefs( {id: 'global-lib-prefs'} ); + this.preferences = new LibraryPrefs( { id: 'global-lib-prefs' } ); this.library_router = new LibraryRouter(); - this.library_router.on('route:libraries', function() { - Galaxy.libraries.libraryToolbarView = new mod_librarytoolbar_view.LibraryToolbarView(); - Galaxy.libraries.libraryListView = new mod_librarylist_view.LibraryListView(); + this.library_router.on( 'route:libraries', function() { + Galaxy.libraries.libraryToolbarView = new mod_librarytoolbar_view.LibraryToolbarView(); + Galaxy.libraries.libraryListView = new mod_librarylist_view.LibraryListView(); + }); + + this.library_router.on('route:libraries_page', function( show_page ) { + if ( Galaxy.libraries.libraryToolbarView === null ){ + Galaxy.libraries.libraryToolbarView = new mod_librarytoolbar_view.LibraryToolbarView(); + Galaxy.libraries.libraryListView = new mod_librarylist_view.LibraryListView( { show_page: show_page } ); + } else { + Galaxy.libraries.libraryListView.render( { show_page: show_page } ) + } }); - this.library_router.on('route:folder_content', function(id) { + this.library_router.on( 'route:folder_content', function( id ) { if (Galaxy.libraries.folderToolbarView){ - Galaxy.libraries.folderToolbarView.$el.unbind('click'); + Galaxy.libraries.folderToolbarView.$el.unbind( 'click' ); } - Galaxy.libraries.folderToolbarView = new mod_foldertoolbar_view.FolderToolbarView({id: id}); - Galaxy.libraries.folderListView = new mod_folderlist_view.FolderListView({id: id}); + Galaxy.libraries.folderToolbarView = new mod_foldertoolbar_view.FolderToolbarView( { id: id } ); + Galaxy.libraries.folderListView = new mod_folderlist_view.FolderListView( { id: id } ); }); - this.library_router.on('route:download', function(folder_id, format) { - if ($('#folder_list_body').find(':checked').length === 0) { - mod_toastr.info( 'You must select at least one dataset to download' ); - Galaxy.libraries.library_router.navigate('folders/' + folder_id, {trigger: true, replace: true}); - } else { - Galaxy.libraries.folderToolbarView.download(folder_id, format); - Galaxy.libraries.library_router.navigate('folders/' + folder_id, {trigger: false, replace: true}); - } + this.library_router.on( 'route:folder_page', function( id, show_page ) { + if ( Galaxy.libraries.folderToolbarView === null ){ + Galaxy.libraries.folderToolbarView = new mod_foldertoolbar_view.FolderToolbarView( {id: id} ); + Galaxy.libraries.folderListView = new mod_folderlist_view.FolderListView( { id: id, show_page: show_page } ); + } else { + Galaxy.libraries.folderListView.render( { id: id, show_page: parseInt( show_page ) } ) + } }); - this.library_router.on('route:dataset_detail', function(folder_id, dataset_id){ - if (Galaxy.libraries.datasetView){ - Galaxy.libraries.datasetView.$el.unbind('click'); - } - Galaxy.libraries.datasetView = new mod_library_dataset_view.LibraryDatasetView({id: dataset_id}); - }); - this.library_router.on('route:dataset_version', function(folder_id, dataset_id, ldda_id){ - if (Galaxy.libraries.datasetView){ - Galaxy.libraries.datasetView.$el.unbind('click'); - } - Galaxy.libraries.datasetView = new mod_library_dataset_view.LibraryDatasetView({id: dataset_id, ldda_id: ldda_id, show_version: true}); - }); + this.library_router.on( 'route:download', function( folder_id, format ) { + if ( $( '#folder_list_body' ).find( ':checked' ).length === 0 ) { + mod_toastr.info( 'You must select at least one dataset to download' ); + Galaxy.libraries.library_router.navigate( 'folders/' + folder_id, { trigger: true, replace: true } ); + } else { + Galaxy.libraries.folderToolbarView.download( folder_id, format ); + Galaxy.libraries.library_router.navigate( 'folders/' + folder_id, { trigger: false, replace: true } ); + } + }); - this.library_router.on('route:dataset_permissions', function(folder_id, dataset_id){ - if (Galaxy.libraries.datasetView){ - Galaxy.libraries.datasetView.$el.unbind('click'); - } - Galaxy.libraries.datasetView = new mod_library_dataset_view.LibraryDatasetView({id: dataset_id, show_permissions: true}); - }); + this.library_router.on( 'route:dataset_detail', function(folder_id, dataset_id){ + if (Galaxy.libraries.datasetView){ + Galaxy.libraries.datasetView.$el.unbind('click'); + } + Galaxy.libraries.datasetView = new mod_library_dataset_view.LibraryDatasetView({id: dataset_id}); + }); - this.library_router.on('route:library_permissions', function(library_id){ - if (Galaxy.libraries.libraryView){ - Galaxy.libraries.libraryView.$el.unbind('click'); - } - Galaxy.libraries.libraryView = new mod_library_library_view.LibraryView({id: library_id, show_permissions: true}); - }); + this.library_router.on( 'route:dataset_version', function(folder_id, dataset_id, ldda_id){ + if (Galaxy.libraries.datasetView){ + Galaxy.libraries.datasetView.$el.unbind('click'); + } + Galaxy.libraries.datasetView = new mod_library_dataset_view.LibraryDatasetView({id: dataset_id, ldda_id: ldda_id, show_version: true}); + }); - this.library_router.on('route:folder_permissions', function(folder_id){ - if (Galaxy.libraries.folderView){ - Galaxy.libraries.folderView.$el.unbind('click'); - } - Galaxy.libraries.folderView = new mod_library_folder_view.FolderView({id: folder_id, show_permissions: true}); - }); - this.library_router.on('route:import_datasets', function(folder_id, source){ - if (Galaxy.libraries.folderToolbarView && Galaxy.libraries.folderListView){ - Galaxy.libraries.folderToolbarView.showImportModal({source:source}); - } else { - Galaxy.libraries.folderToolbarView = new mod_foldertoolbar_view.FolderToolbarView({id: folder_id}); - Galaxy.libraries.folderListView = new mod_folderlist_view.FolderListView({id: folder_id}); - Galaxy.libraries.folderToolbarView.showImportModal({source: source}); - } - }); + this.library_router.on( 'route:dataset_permissions', function(folder_id, dataset_id){ + if (Galaxy.libraries.datasetView){ + Galaxy.libraries.datasetView.$el.unbind('click'); + } + Galaxy.libraries.datasetView = new mod_library_dataset_view.LibraryDatasetView({id: dataset_id, show_permissions: true}); + }); - Backbone.history.start({pushState: false}); + this.library_router.on( 'route:library_permissions', function(library_id){ + if (Galaxy.libraries.libraryView){ + Galaxy.libraries.libraryView.$el.unbind('click'); + } + Galaxy.libraries.libraryView = new mod_library_library_view.LibraryView({id: library_id, show_permissions: true}); + }); + + this.library_router.on( 'route:folder_permissions', function(folder_id){ + if (Galaxy.libraries.folderView){ + Galaxy.libraries.folderView.$el.unbind('click'); + } + Galaxy.libraries.folderView = new mod_library_folder_view.FolderView({id: folder_id, show_permissions: true}); + }); + + this.library_router.on( 'route:import_datasets', function( folder_id, source ){ + if ( Galaxy.libraries.folderToolbarView && Galaxy.libraries.folderListView ){ + Galaxy.libraries.folderToolbarView.showImportModal( { source:source } ); + } else { + Galaxy.libraries.folderToolbarView = new mod_foldertoolbar_view.FolderToolbarView( { id: folder_id } ); + Galaxy.libraries.folderListView = new mod_folderlist_view.FolderListView( { id: folder_id } ); + Galaxy.libraries.folderToolbarView.showImportModal( { source: source } ); + } + }); + + Backbone.history.start({pushState: false}); } }); diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/galaxy.masthead.js --- a/client/galaxy/scripts/galaxy.masthead.js +++ b/client/galaxy/scripts/galaxy.masthead.js @@ -1,9 +1,8 @@ // dependencies define([], function() { -// masthead -var GalaxyMasthead = Backbone.View.extend( -{ +/** Masthead **/ +var GalaxyMasthead = Backbone.View.extend({ // base element el_masthead: '#everything', @@ -17,8 +16,7 @@ list: [], // initialize - initialize : function(options) - { + initialize : function(options) { // update options this.options = options; @@ -51,27 +49,23 @@ }, // configure events - events: - { + events: { 'click' : '_click', 'mousedown' : function(e) { e.preventDefault() } }, // adds a new item to the masthead - append : function(item) - { + append : function(item) { return this._add(item, true); }, // adds a new item to the masthead - prepend : function(item) - { + prepend : function(item) { return this._add(item, false); }, // activate - highlight: function(id) - { + highlight: function(id) { var current = $(this.el).find('#' + id + '> li'); if (current) { current.addClass('active'); @@ -79,11 +73,9 @@ }, // adds a new item to the masthead - _add : function(item, append) - { + _add : function(item, append) { var $loc = $(this.el).find('#' + item.location); - if ($loc) - { + if ($loc){ // create frame for new item var $current = $(item.el); @@ -106,8 +98,7 @@ }, // handle click event - _click: function(e) - { + _click: function(e) { // close all popups var $all = $(this.el).find('.popup'); if ($all) { @@ -129,8 +120,7 @@ */ // fill template - _template: function(options) - { + _template: function(options) { var brand_text = options.brand ? ("/ " + options.brand) : "" ; return '<div><div id="masthead" class="navbar navbar-fixed-top navbar-inverse">' + '<div style="position: relative; right: -50%; float: left;">' + @@ -150,12 +140,10 @@ } }); -// icon -var GalaxyMastheadIcon = Backbone.View.extend( -{ +/** Masthead icon **/ +var GalaxyMastheadIcon = Backbone.View.extend({ // icon options - options: - { + options:{ id : '', icon : 'fa-cog', tooltip : '', @@ -169,8 +157,7 @@ location: 'iconbar', // initialize - initialize: function (options) - { + initialize: function (options){ // read in defaults if (options) this.options = _.defaults(options, this.options); @@ -189,20 +176,17 @@ }, // show - show: function() - { + show: function(){ $(this.el).css({visibility : 'visible'}); }, // show - hide: function() - { + hide: function(){ $(this.el).css({visibility : 'hidden'}); }, // switch icon - icon: function (new_icon) - { + icon: function (new_icon){ // update icon class $(this.el).find('.icon').removeClass(this.options.icon) .addClass(new_icon); @@ -212,26 +196,22 @@ }, // toggle - toggle: function() - { + toggle: function(){ $(this.el).addClass('toggle'); }, // untoggle - untoggle: function() - { + untoggle: function(){ $(this.el).removeClass('toggle'); }, // set/get number - number: function(new_number) - { + number: function(new_number){ $(this.el).find('.number').text(new_number); }, // fill template icon - _template: function (options) - { + _template: function (options){ var tmpl = '<div id="' + options.id + '" class="symbol">' + '<div class="icon fa fa-2x ' + options.icon + '"></div>'; if (options.with_number) @@ -243,12 +223,10 @@ } }); -// tab -var GalaxyMastheadTab = Backbone.View.extend( -{ +/** Masthead tab **/ +var GalaxyMastheadTab = Backbone.View.extend({ // main options - options: - { + options:{ id : '', title : '', target : '_parent', @@ -268,52 +246,50 @@ $menu: null, // events - events: - { + events:{ 'click .head' : '_head' }, // initialize - initialize: function (options) - { + initialize: function ( options ){ // read in defaults - if (options) - this.options = _.defaults(options, this.options); - + if ( options ){ + this.options = _.defaults( options, this.options ); + } + // update url - if (this.options.content && this.options.content.indexOf('//') === -1) + if ( this.options.content !== undefined && this.options.content.indexOf( '//' ) === -1 ){ this.options.content = galaxy_config.root + this.options.content; + } // add template for tab - this.setElement($(this._template(this.options))); + this.setElement( $( this._template( this.options ) ) ); // disable menu items that are not available to anonymous user // also show title to explain why they are disabled - if (this.options.disabled){ - $(this.el).find('.root').addClass('disabled'); + if ( this.options.disabled ){ + $( this.el ).find( '.root' ).addClass( 'disabled' ); this._attachPopover(); } // visiblity - if (!this.options.visible) + if ( !this.options.visible ){ this.hide(); + } }, // show - show: function() - { + show: function(){ $(this.el).css({visibility : 'visible'}); }, // show - hide: function() - { + hide: function(){ $(this.el).css({visibility : 'hidden'}); }, // add menu item - add: function (options) - { + add: function (options){ // menu option defaults var menuOptions = { title : 'Title', @@ -333,8 +309,7 @@ menuOptions.content = galaxy_config.root + menuOptions.content; // check if submenu element is available - if (!this.$menu) - { + if (!this.$menu){ // insert submenu element into root $(this.el).find('.root').append(this._templateMenu()); @@ -353,8 +328,7 @@ // add events var self = this; - $item.on('click', function(e) - { + $item.on('click', function(e){ // prevent default e.preventDefault(); @@ -372,8 +346,7 @@ }, // show menu on header click - _head: function(e) - { + _head: function(e){ // prevent default e.preventDefault(); @@ -387,12 +360,11 @@ } }, - _attachPopover : function() - { + _attachPopover : function(){ var $popover_element = $(this.el).find('.head'); $popover_element.popover({ html: true, - content: 'Please <a href="/user/login">log in</a> or <a href="/user/create">register</a> to use this feature.', + content: 'Please <a href="' + galaxy_config.root + '/user/login">log in</a> or <a href="' + galaxy_config.root + '/user/create">register</a> to use this feature.', placement: 'bottom' }).on('shown.bs.popover', function() { // hooking on bootstrap event to automatically hide popovers after delay setTimeout(function() { @@ -402,25 +374,21 @@ }, // fill template header - _templateMenuItem: function (options) - { + _templateMenuItem: function (options){ return '<li><a href="' + options.content + '" target="' + options.target + '">' + options.title + '</a></li>'; }, // fill template header - _templateMenu: function () - { + _templateMenu: function (){ return '<ul class="popup dropdown-menu"></ul>'; }, - _templateDivider: function() - { + _templateDivider: function(){ return '<li class="divider"></li>'; }, // fill template - _template: function (options) - { + _template: function (options){ // start template var tmpl = '<ul id="' + options.id + '" class="nav navbar-nav" border="0" cellspacing="0">' + '<li class="root dropdown" style="">' + diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/galaxy.menu.js --- a/client/galaxy/scripts/galaxy.menu.js +++ b/client/galaxy/scripts/galaxy.menu.js @@ -1,13 +1,8 @@ -/* - galaxy menu -*/ - // dependencies define(["galaxy.masthead"], function(mod_masthead) { -// frame manager -var GalaxyMenu = Backbone.Model.extend( -{ +/** GalaxyMenu uses the GalaxyMasthead class in order to add menu items and icons to the Masthead **/ +var GalaxyMenu = Backbone.Model.extend({ // options options: null, @@ -15,23 +10,21 @@ masthead: null, // initialize - initialize: function(options) - { + initialize: function(options) { this.options = options.config; this.masthead = options.masthead; this.create(); }, // default menu - create: function() - { + create: function(){ // // Analyze data tab. // var tab_analysis = new mod_masthead.GalaxyMastheadTab({ id : "analysis", title : "Analyze Data", - content : "root/index", + content : "", title_attribute : 'Analysis home view' }); this.masthead.append(tab_analysis); @@ -137,8 +130,7 @@ var tab_visualization = new mod_masthead.GalaxyMastheadTab(visualization_options); - if (this.options.user.valid) //add submenu only when user is logged in - { + if (this.options.user.valid){ //add submenu only when user is logged in tab_visualization.add({ title : "New Track Browser", content : "visualization/trackster", @@ -155,8 +147,7 @@ // // Cloud menu. // - if (this.options.enable_cloud_launch) - { + if (this.options.enable_cloud_launch){ var tab_cloud = new mod_masthead.GalaxyMastheadTab({ id : "cloud", title : "Cloud", @@ -172,12 +163,11 @@ // // Admin. // - if (this.options.is_admin_user) - { + if (this.options.is_admin_user) { var tab_admin = new mod_masthead.GalaxyMastheadTab({ id : "admin", title : "Admin", - content : "admin/index", + content : "admin", extra_class : "admin-only", title_attribute : 'Administer this Galaxy' }); @@ -192,8 +182,7 @@ title : "Help", title_attribute : 'Support, contact, and community hubs' }); - if (this.options.biostar_url) - { + if (this.options.biostar_url){ tab_help.add({ title : "Galaxy Biostar", content : this.options.biostar_url_redirect, @@ -235,8 +224,7 @@ content : this.options.citation_url, target : "_blank" }); - if (this.options.terms_url) - { + if (this.options.terms_url){ tab_help.add({ title : "Terms and Conditions", content : this.options.terms_url, @@ -248,8 +236,7 @@ // // User tab. // - if (!this.options.user.valid) - { + if (!this.options.user.valid){ var tab_user = new mod_masthead.GalaxyMastheadTab({ id : "user", title : "User", @@ -265,8 +252,7 @@ }); // register - if (this.options.allow_user_creation) - { + if (this.options.allow_user_creation){ tab_user.add({ title : "Register", content : "user/create", @@ -331,8 +317,7 @@ target : "galaxy_main" }); - if (this.options.use_remote_user) - { + if (this.options.use_remote_user){ tab_user.add({ title : "Public Name", content : "user/edit_username?cntrller=user", diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/base-mvc.js --- a/client/galaxy/scripts/mvc/base-mvc.js +++ b/client/galaxy/scripts/mvc/base-mvc.js @@ -380,11 +380,6 @@ /** allow the view to be dragged, set up event handlers */ draggableOn : function(){ this.draggable = true; - //TODO: I have no idea why this doesn't work with the events hash or jq.on()... - //this.$el.find( '.title-bar' ) - // .attr( 'draggable', true ) - // .bind( 'dragstart', this.dragStartHandler, false ) - // .bind( 'dragend', this.dragEndHandler, false ); this.dragStartHandler = _.bind( this._dragStartHandler, this ); this.dragEndHandler = _.bind( this._dragEndHandler, this ); @@ -402,23 +397,22 @@ }, /** sets the dataTransfer data to the model's toJSON - * @fires dragstart (bbone event) which is passed this view + * @fires draggable:dragstart (bbone event) which is passed the event and this view */ _dragStartHandler : function( event ){ - //this.debug( 'dragStartHandler:', this, event, arguments ) - this.trigger( 'dragstart', this ); event.dataTransfer.effectAllowed = 'move'; + //ASSUMES: this.model //TODO: all except IE: should be 'application/json', IE: must be 'text' event.dataTransfer.setData( 'text', JSON.stringify( this.model.toJSON() ) ); + this.trigger( 'draggable:dragstart', event, this ); return false; }, /** handle the dragend - * @fires dragend (bbone event) which is passed this view + * @fires draggable:dragend (bbone event) which is passed the event and this view */ _dragEndHandler : function( event ){ - this.trigger( 'dragend', this ); - //this.debug( 'dragEndHandler:', event ) + this.trigger( 'draggable:dragend', event, this ); return false; } }; diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/collection/collection-li.js --- a/client/galaxy/scripts/mvc/collection/collection-li.js +++ b/client/galaxy/scripts/mvc/collection/collection-li.js @@ -100,8 +100,6 @@ /** add the DCE class to the list item */ className : ListItemView.prototype.className + " dataset-collection-element", - /** jq fx speed for this view */ - fxSpeed : 'fast', /** set up */ initialize : function( attributes ){ diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/dataset/dataset-li-edit.js --- a/client/galaxy/scripts/mvc/dataset/dataset-li-edit.js +++ b/client/galaxy/scripts/mvc/dataset/dataset-li-edit.js @@ -181,6 +181,8 @@ } var $visualizations = $( this.templates.visualizations( visualizations, this ) ); + //HACK: need to re-write those directed at galaxy_main with linkTarget + $visualizations.find( '[target="galaxy_main"]').attr( 'target', this.linkTarget ); // use addBack here to include the root $visualizations elem (for the case of 1 visualization) this._addScratchBookFn( $visualizations.find( '.visualization-link' ).addBack( '.visualization-link' ) ); return $visualizations; @@ -188,6 +190,7 @@ /** add scratchbook functionality to visualization links */ _addScratchBookFn : function( $links ){ + var li = this; $links.click( function( ev ){ if( Galaxy.frame && Galaxy.frame.active ){ Galaxy.frame.add({ diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/dataset/dataset-li.js --- a/client/galaxy/scripts/mvc/dataset/dataset-li.js +++ b/client/galaxy/scripts/mvc/dataset/dataset-li.js @@ -160,7 +160,7 @@ // add frame manager option onclick event var self = this; displayBtnData.onclick = function( ev ){ - if( Galaxy.frame && Galaxy.frame.active ){ + if (Galaxy.frame && Galaxy.frame.active) { // Add dataset to frames. Galaxy.frame.add_dataset(self.model.get('id')); ev.preventDefault(); diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/dataset/dataset-model.js --- a/client/galaxy/scripts/mvc/dataset/dataset-model.js +++ b/client/galaxy/scripts/mvc/dataset/dataset-model.js @@ -363,74 +363,11 @@ Backbone.Collection.prototype.set.call( this, models, options ); }, -// /** Convert this ad-hoc collection of hdas to a formal collection tracked -// by the server. -// **/ -// promoteToHistoryDatasetCollection : function _promote( history, collection_type, options ){ -////TODO: seems like this would be better in mvc/collections -// options = options || {}; -// options.url = this.url(); -// options.type = "POST"; -// var full_collection_type = collection_type; -// var element_identifiers = [], -// name = null; -// -// // This mechanism is rough - no error handling, allows invalid selections, no way -// // for user to pick/override element identifiers. This is only really meant -// if( collection_type === "list" ) { -// this.chain().each( function( hda ) { -// // TODO: Handle duplicate names. -// var name = hda.attributes.name; -// var id = hda.get('id'); -// var content_type = hda.attributes.history_content_type; -// if( content_type === "dataset" ) { -// if( full_collection_type !== "list" ) { -// this.log( "Invalid collection type" ); -// } -// element_identifiers.push( { name: name, src: "hda", id: id } ); -// } else { -// if( full_collection_type === "list" ) { -// full_collection_type = "list:" + hda.attributes.collection_type; -// } else { -// if( full_collection_type !== "list:" + hda.attributes.collection_type ) { -// this.log( "Invalid collection type" ); -// } -// } -// element_identifiers.push( { name: name, src: "hdca", id: id } ); -// } -// }); -// name = "New Dataset List"; -// } else if( collection_type === "paired" ) { -// var ids = this.ids(); -// if( ids.length !== 2 ){ -// // TODO: Do something... -// } -// element_identifiers.push( { name: "forward", src: "hda", id: ids[ 0 ] } ); -// element_identifiers.push( { name: "reverse", src: "hda", id: ids[ 1 ] } ); -// name = "New Dataset Pair"; -// } -// options.data = { -// type: "dataset_collection", -// name: name, -// collection_type: full_collection_type, -// element_identifiers: JSON.stringify( element_identifiers ) -// }; -// -// var xhr = jQuery.ajax( options ); -// xhr.done( function( message, status, responseObj ){ -// history.refresh( ); -// }); -// xhr.fail( function( xhr, status, message ){ -// if( xhr.responseJSON && xhr.responseJSON.error ){ -// error = xhr.responseJSON.error; -// } else { -// error = xhr.responseJSON; -// } -// xhr.responseText = error; -// // Do something? -// }); -// return xhr; -// }, + ///** Convert this ad-hoc collection of hdas to a formal collection tracked + // by the server. + //**/ + //promoteToHistoryDatasetCollection : function _promote( history, collection_type, options ){ + //}, /** String representation. */ toString : function(){ diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/history/history-contents.js --- a/client/galaxy/scripts/mvc/history/history-contents.js +++ b/client/galaxy/scripts/mvc/history/history-contents.js @@ -187,6 +187,7 @@ /** copy an existing, accessible hda into this collection */ copy : function( id ){ +//TODO: incorp collections var collection = this, xhr = jQuery.post( this.url(), { source : 'hda', diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/history/history-model.js --- a/client/galaxy/scripts/mvc/history/history-model.js +++ b/client/galaxy/scripts/mvc/history/history-model.js @@ -14,7 +14,7 @@ * @constructs */ var History = Backbone.Model.extend( BASE_MVC.LoggableMixin ).extend( -/** @lends History.prototype */{ + BASE_MVC.mixin( BASE_MVC.SearchableModelMixin, /** @lends History.prototype */{ /** logger used to record this.log messages, commonly set to console */ //logger : console, @@ -117,7 +117,19 @@ return _.reduce( _.values( this.get( 'state_details' ) ), function( memo, num ){ return memo + num; }, 0 ); }, - // ........................................................................ ajax + // ........................................................................ search + /** What model fields to search with */ + searchAttributes : [ + 'name', 'annotation', 'tags' + ], + + /** Adding title and singular tag */ + searchAliases : { + title : 'name', + tag : 'tags' + }, + + // ........................................................................ updates /** does the contents collection indicate they're still running and need to be updated later? * delay + update if needed * @param {Function} onReadyCallback function to run when all contents are in the ready state @@ -187,6 +199,18 @@ return xhr; }, + // ........................................................................ ajax + /** save this history, _Mark_ing it as deleted (just a flag) */ + _delete : function( options ){ + if( this.get( 'deleted' ) ){ return jQuery.when(); } + return this.save( { deleted: true }, options ); + }, + /** save this history, _Mark_ing it as undeleted */ + undelete : function( options ){ + if( !this.get( 'deleted' ) ){ return jQuery.when(); } + return this.save( { deleted: false }, options ); + }, + /** Make a copy of this history on the server * @param {Boolean} current if true, set the copy as the new current history (default: true) * @param {String} name name of new history (default: none - server sets to: Copy of <current name>) @@ -210,9 +234,28 @@ //TODO:?? all datasets? var history = this, - xhr = jQuery.post( this.urlRoot, postData ); - xhr.done( function( newData ){ - history.trigger( 'copied', history, newData ); + copy = jQuery.post( this.urlRoot, postData ); + // if current - queue to setAsCurrent before firing 'copied' + if( current ){ + return copy.then( function( response ){ + var newHistory = new History( response ); + return newHistory.setAsCurrent() + .done( function(){ + history.trigger( 'copied', history, response ); + }); + }); + } + return copy.done( function( response ){ + history.trigger( 'copied', history, response ); + }); + }, + + setAsCurrent : function(){ + var history = this, + xhr = jQuery.getJSON( '/history/set_as_current?id=' + this.id ); + + xhr.done( function(){ + history.trigger( 'set-as-current', history ); }); return xhr; }, @@ -221,7 +264,7 @@ toString : function(){ return 'History(' + this.get( 'id' ) + ',' + this.get( 'name' ) + ')'; } -}); +})); //------------------------------------------------------------------------------ CLASS VARS /** When the history has running hdas, @@ -351,12 +394,15 @@ create : function create( data, hdas, historyOptions, xhrOptions ){ var collection = this, - history = new History( data || {}, hdas || [], historyOptions || {} ); - return history.save( xhrOptions ).done( function( newData ){ + xhr = jQuery.getJSON( galaxy_config.root + 'history/create_new_current' ); + return xhr.done( function( newData ){ + var history = new History( newData, [], historyOptions || {} ); // new histories go in the front //TODO: (implicit ordering by update time...) collection.unshift( history ); + collection.trigger( 'new-current' ); }); +//TODO: move back to using history.save (via Deferred.then w/ set_as_current) }, toString: function toString(){ diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/history/history-panel-edit-current.js --- a/client/galaxy/scripts/mvc/history/history-panel-edit-current.js +++ b/client/galaxy/scripts/mvc/history/history-panel-edit-current.js @@ -164,12 +164,13 @@ _setUpCollectionListeners : function(){ _super.prototype._setUpCollectionListeners.call( this ); + //TODO:?? may not be needed? see history-panel-edit, 369 // if a hidden item is created (gen. by a workflow), moves thru the updater to the ready state, // then: remove it from the collection if the panel is set to NOT show hidden datasets this.collection.on( 'state:ready', function( model, newState, oldState ){ if( ( !model.get( 'visible' ) ) && ( !this.storage.get( 'show_hidden' ) ) ){ - this.removeItemView( this.viewFromModel( model ) ); + this.removeItemView( model ); } }, this ); }, @@ -226,7 +227,7 @@ '</a>' ].join('') ); $emptyMsg.find( '.uploader-link' ).click( function( ev ){ - Galaxy.upload._eventShow( ev ); + Galaxy.upload.show( ev ); }); $emptyMsg.find( '.get-data-link' ).click( function( ev ){ $toolMenu.parent().scrollTop( 0 ); @@ -288,51 +289,23 @@ }, // ------------------------------------------------------------------------ sub-views - // reverse HID order - /** Override to reverse order of views - newest contents on top - * and add the current-content highlight class to currentContentId's view - */ + /** Override to add the current-content highlight class to currentContentId's view */ _attachItems : function( $whereTo ){ - var panel = this; - this.$list( $whereTo ).append( this.views.reverse().map( function( view ){ - // add current content - if( panel.currentContentId && view.model.id === panel.currentContentId ){ - panel.setCurrentContent( view ); - } - return view.$el; - })); + _super.prototype._attachItems.call( this, $whereTo ); + var panel = this, + currentContentView; + if( panel.currentContentId + && ( currentContentView = panel.viewFromModelId( panel.currentContentId ) ) ){ + panel.setCurrentContent( currentContentView ); + } return this; }, - /** Override to add datasets at the top */ + /** Override to remove any drill down panels */ addItemView : function( model, collection, options ){ - this.log( this + '.addItemView:', model ); - var panel = this; - if( !panel._filterItem( model ) ){ return undefined; } -//TODO: alternately, call collapse drilldown - // if this panel is currently hidden, return undefined - if( panel.panelStack.length ){ return this._collapseDrilldownPanel(); } - - var view = panel._createItemView( model ); - // use unshift and prepend to preserve reversed order - panel.views.unshift( view ); - - panel.scrollToTop(); - $({}).queue([ - function fadeOutEmptyMsg( next ){ - var $emptyMsg = panel.$emptyMessage(); - if( $emptyMsg.is( ':visible' ) ){ - $emptyMsg.fadeOut( panel.fxSpeed, next ); - } else { - next(); - } - }, - function createAndPrepend( next ){ - // render as hidden then slide down - panel.$list().prepend( view.render( 0 ).$el.hide() ); - view.$el.slideDown( panel.fxSpeed ); - } - ]); + var view = _super.prototype.addItemView.call( this, model, collection, options ); + if( !view ){ return view; } + if( this.panelStack.length ){ return this._collapseDrilldownPanel(); } return view; }, @@ -423,22 +396,6 @@ if( !msg.is( ':hidden' ) ){ msg.slideUp( this.fxSpeed ); } }, -//TODO: move show_deleted/hidden into panel from opt menu and remove this - /** add listeners to an external options menu (templates/webapps/galaxy/root/index.mako) */ - connectToOptionsMenu : function( optionsMenu ){ - if( !optionsMenu ){ - return this; - } - // set a visible indication in the popupmenu for show_hidden/deleted based on the currHistoryPanel's settings - this.on( 'new-storage', function( storage, panel ){ - if( optionsMenu && storage ){ - optionsMenu.findItemByHtml( _l( 'Include Deleted Datasets' ) ).checked = storage.get( 'show_deleted' ); - optionsMenu.findItemByHtml( _l( 'Include Hidden Datasets' ) ).checked = storage.get( 'show_hidden' ); - } - }); - return this; - }, - /** Return a string rep of the history */ toString : function(){ diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/history/history-panel-edit.js --- a/client/galaxy/scripts/mvc/history/history-panel-edit.js +++ b/client/galaxy/scripts/mvc/history/history-panel-edit.js @@ -69,7 +69,18 @@ this.multiselectActions = attributes.multiselectActions || this._getActions(); }, - // ------------------------------------------------------------------------ panel rendering + /** Override to handle history as drag-drop target */ + _setUpListeners : function(){ + _super.prototype._setUpListeners.call( this ); + + this.on( 'drop', function( ev, data ){ + this.dataDropped( data ); + // remove the drop target + this.dropTargetOff(); + }); + }, + + // ------------------------------------------------------------------------ listeners /** listening for collection events */ _setUpCollectionListeners : function(){ _super.prototype._setUpCollectionListeners.call( this ); @@ -104,6 +115,42 @@ return $newRender; }, + /** override to render counts when the items are rendered */ + renderItems : function( $whereTo ){ + var views = _super.prototype.renderItems.call( this, $whereTo ); + this._renderCounts( $whereTo ); + return views; + }, + + /** override to show counts, what's deleted/hidden, and links to toggle those */ + _renderCounts : function( $whereTo ){ +//TODO: too complicated + function toggleLink( _class, text ){ + return [ '<a class="', _class, '" href="javascript:void(0);">', text, '</a>' ].join( '' ); + } + $whereTo = $whereTo || this.$el; + var deleted = this.collection.where({ deleted: true }), + hidden = this.collection.where({ visible: false }), + msgs = []; + + if( this.views.length ){ + msgs.push( [ this.views.length, _l( 'shown' ) ].join( ' ' ) ); + } + if( deleted.length ){ + msgs.push( ( !this.showDeleted )? + ([ deleted.length, toggleLink( 'toggle-deleted-link', _l( 'deleted' ) ) ].join( ' ' )) + :( toggleLink( 'toggle-deleted-link', _l( 'hide deleted' ) ) ) + ); + } + if( hidden.length ){ + msgs.push( ( !this.showHidden )? + ([ hidden.length, toggleLink( 'toggle-hidden-link', _l( 'hidden' ) ) ].join( ' ' )) + :( toggleLink( 'toggle-hidden-link', _l( 'hide hidden' ) ) ) + ); + } + return $whereTo.find( '> .controls .subtitle' ).html( msgs.join( ', ' ) ); + }, + /** render the tags sub-view controller */ _renderTags : function( $where ){ var panel = this; @@ -266,6 +313,24 @@ }, // ------------------------------------------------------------------------ sub-views + // reverse HID order + /** Override to reverse order of views - newest contents on top */ + _attachItems : function( $whereTo ){ + this.$list( $whereTo ).append( this.views.reverse().map( function( view ){ + return view.$el; + })); + return this; + }, + + /** Override to add new contents at the top */ + _attachView : function( view ){ + var panel = this; + // override to control where the view is added, how/whether it's rendered + panel.views.unshift( view ); + panel.$list().prepend( view.render( 0 ).$el.hide() ); + view.$el.slideDown( panel.fxSpeed ); + }, + /** In this override, add purgeAllowed and whether tags/annotation editors should be shown */ _getItemViewOptions : function( model ){ var options = _super.prototype._getItemViewOptions.call( this, model ); @@ -278,22 +343,31 @@ return options; }, + ///** Override to alter data in drag based on multiselection */ + //_setUpItemViewListeners : function( view ){ + // var panel = this; + // _super.prototype._setUpItemViewListeners.call( panel, view ); + // + //}, + /** If this item is deleted and we're not showing deleted items, remove the view * @param {Model} the item model to check */ _handleHdaDeletionChange : function( itemModel ){ - if( itemModel.get( 'deleted' ) && !this.storage.get( 'show_deleted' ) ){ + if( itemModel.get( 'deleted' ) && !this.showDeleted ){ this.removeItemView( itemModel ); } + this._renderCounts(); }, /** If this item is hidden and we're not showing hidden items, remove the view * @param {Model} the item model to check */ _handleHdaVisibleChange : function( itemModel ){ - if( itemModel.hidden() && !this.storage.get( 'show_hidden' ) ){ + if( itemModel.hidden() && !this.storage.showHidden ){ this.removeItemView( itemModel ); } + this._renderCounts(); }, /** toggle the visibility of each content's tagsEditor applying all the args sent to this function */ @@ -319,7 +393,9 @@ // ------------------------------------------------------------------------ panel events /** event map */ events : _.extend( _.clone( _super.prototype.events ), { - 'click .show-selectors-btn' : 'toggleSelectors' + 'click .show-selectors-btn' : 'toggleSelectors', + 'click .toggle-deleted-link' : function( ev ){ this.toggleShowDeleted(); }, + 'click .toggle-hidden-link' : function( ev ){ this.toggleShowHidden(); } }), /** Update the history size display (curr. upper right of panel). @@ -328,6 +404,132 @@ this.$el.find( '.history-size' ).text( this.model.get( 'nice_size' ) ); }, + // ------------------------------------------------------------------------ as drop target + /** */ + dropTargetOn : function(){ + if( this.dropTarget ){ return this; } + this.dropTarget = true; + + //TODO: to init + var dropHandlers = { + 'dragenter' : _.bind( this.dragenter, this ), + 'dragover' : _.bind( this.dragover, this ), + 'dragleave' : _.bind( this.dragleave, this ), + 'drop' : _.bind( this.drop, this ) + }; +//TODO: scroll to top + var $dropTarget = this._renderDropTarget(); + this.$list().before([ this._renderDropTargetHelp(), $dropTarget ]); + for( var evName in dropHandlers ){ + if( dropHandlers.hasOwnProperty( evName ) ){ + //console.debug( evName, dropHandlers[ evName ] ); + $dropTarget.on( evName, dropHandlers[ evName ] ); + } + } + return this; + }, + + /** */ + _renderDropTarget : function(){ + return $( '<div/>' ).addClass( 'history-drop-target' ) + .css({ + 'height': '64px', + 'margin': '0px 10px 10px 10px', + 'border': '1px dashed black', + 'border-radius' : '3px' + }); + }, + + /** */ + _renderDropTargetHelp : function(){ + return $( '<div/>' ).addClass( 'history-drop-target-help' ) + .css({ + 'margin' : '10px 10px 4px 10px', + 'color' : 'grey', + 'font-size' : '80%', + 'font-style' : 'italic' + }) + .text( _l( 'Drag datasets here to copy them to the current history' ) ); + }, + + /** */ + dropTargetOff : function(){ + if( !this.dropTarget ){ return this; } + //this.log( 'dropTargetOff' ); + this.dropTarget = false; + // + //var dropTarget = this.$( '.history-drop-target' ).get(0); + //for( var evName in this._dropHandlers ){ + // if( this._dropHandlers.hasOwnProperty( evName ) ){ + // console.debug( evName, this._dropHandlers[ evName ] ); + // dropTarget.off( evName, this._dropHandlers[ evName ] ); + // } + //} + this.$( '.history-drop-target' ).remove(); + this.$( '.history-drop-target-help' ).remove(); + return this; + }, + /** */ + dropTargetToggle : function(){ + if( this.dropTarget ){ + this.dropTargetOff(); + } else { + this.dropTargetOn(); + } + return this; + }, + + /** */ + dragenter : function( ev ){ + //console.debug( 'dragenter:', this, ev ); + ev.preventDefault(); + ev.stopPropagation(); + this.$( '.history-drop-target' ).css( 'border', '2px solid black' ); + }, + /** */ + dragover : function( ev ){ + ev.preventDefault(); + ev.stopPropagation(); + }, + /** */ + dragleave : function( ev ){ + //console.debug( 'dragleave:', this, ev ); + ev.preventDefault(); + ev.stopPropagation(); + this.$( '.history-drop-target' ).css( 'border', '1px dashed black' ); + }, + /** */ + drop : function( ev ){ + //console.warn( 'dataTransfer:', ev.dataTransfer.getData( 'text' ) ); + //console.warn( 'dataTransfer:', ev.originalEvent.dataTransfer.getData( 'text' ) ); + ev.preventDefault(); + //ev.stopPropagation(); + ev.dataTransfer.dropEffect = 'move'; + + //console.debug( 'ev.dataTransfer:', ev.dataTransfer ); + + var panel = this, + data = ev.dataTransfer.getData( "text" ); + try { + data = JSON.parse( data ); + + } catch( err ){ + this.warn( 'error parsing JSON from drop:', data ); + } + this.trigger( 'droptarget:drop', ev, data, panel ); + return false; + }, + + /** */ + dataDropped : function( data ){ + var panel = this; + // HDA: dropping will copy it to the history + if( _.isObject( data ) && data.model_class === 'HistoryDatasetAssociation' && data.id ){ + return panel.model.contents.copy( data.id ); + } + return jQuery.when(); + }, + // ........................................................................ misc /** Return a string rep of the history */ toString : function(){ diff -r 605de23ca239879a34426885feddd2d3a459ca26 -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee client/galaxy/scripts/mvc/history/history-panel.js --- a/client/galaxy/scripts/mvc/history/history-panel.js +++ b/client/galaxy/scripts/mvc/history/history-panel.js @@ -254,6 +254,10 @@ _setUpWebStorage : function( initiallyExpanded, show_deleted, show_hidden ){ //if( !this.model ){ return this; } //this.log( '_setUpWebStorage', initiallyExpanded, show_deleted, show_hidden ); + if( this.storage ){ + this.stopListening( this.storage ); + } + this.storage = new HistoryPrefs({ id: HistoryPrefs.historyStorageKey( this.model.get( 'id' ) ) }); @@ -276,6 +280,18 @@ this.trigger( 'new-storage', this.storage, this ); this.log( this + ' (init\'d) storage:', this.storage.get() ); + + this.listenTo( this.storage, { + 'change:show_deleted' : function( view, newVal ){ + this.showDeleted = newVal; + }, + 'change:show_hidden' : function( view, newVal ){ + this.showHidden = newVal; + } + }, this ); + this.showDeleted = ( show_deleted !== undefined )? show_deleted : this.storage.get( 'show_deleted' ); + this.showHidden = ( show_hidden !== undefined )? show_hidden : this.storage.get( 'show_hidden' ); + return this; }, @@ -317,8 +333,8 @@ _filterItem : function( model ){ var panel = this; return ( _super.prototype._filterItem.call( panel, model ) - && ( !model.hidden() || panel.storage.get( 'show_hidden' ) ) - && ( !model.isDeletedOrPurged() || panel.storage.get( 'show_deleted' ) ) ); + && ( !model.hidden() || panel.showHidden ) + && ( !model.isDeletedOrPurged() || panel.showDeleted ) ); }, /** in this override, add a linktarget, and expand if id is in web storage */ @@ -372,12 +388,17 @@ * (2) re-rendering the history * @returns {Boolean} new show_deleted setting */ - toggleShowDeleted : function( show ){ - show = ( show !== undefined )?( show ):( !this.storage.get( 'show_deleted' ) ); - this.storage.set( 'show_deleted', show ); + toggleShowDeleted : function( show, store ){ + show = ( show !== undefined )?( show ):( !this.showDeleted ); + store = ( store !== undefined )?( store ):( true ); + this.showDeleted = show; + if( store ){ + this.storage.set( 'show_deleted', show ); + } + this.trigger( 'show-hidden', show ); //TODO:?? to events on storage('change:show_deleted') this.renderItems(); - return this.storage.get( 'show_deleted' ); + return this.showDeleted; }, /** Handle the user toggling the deleted visibility by: @@ -385,12 +406,17 @@ * (2) re-rendering the history * @returns {Boolean} new show_hidden setting */ - toggleShowHidden : function( show ){ - show = ( show !== undefined )?( show ):( !this.storage.get( 'show_hidden' ) ); - this.storage.set( 'show_hidden', show ); - //TODO:?? to events on storage('change:show_hidden') + toggleShowHidden : function( show, store ){ + show = ( show !== undefined )?( show ):( !this.showHidden ); + store = ( store !== undefined )?( store ):( true ); + this.showHidden = show; + if( store ){ + this.storage.set( 'show_hidden', show ); + } + this.trigger( 'show-hidden', show ); + //TODO:?? to events on storage('change:show_deleted') this.renderItems(); - return this.storage.get( 'show_hidden' ); + return this.showHidden; }, /** On the first search, if there are no details - load them, then search */ @@ -582,9 +608,7 @@ '<div class="title">', '<div class="name"><%= history.name %></div>', '</div>', - '<div class="subtitle">', - //'<%= view.collection.length %>', _l( ' items' ), - '</div>', + '<div class="subtitle"></div>', '<div class="history-size"><%= history.nice_size %></div>', '<div class="actions"></div>', This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/055016b332aa/ Changeset: 055016b332aa Branch: stable User: natefoo Date: 2015-01-13 15:36:58+00:00 Summary: Added tag release_2015.01.13 for changeset 2e8dd2949dd3 Affected #: 1 file diff -r 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee -r 055016b332aa41d559f9ec12c638734824292b54 .hgtags --- a/.hgtags +++ b/.hgtags @@ -21,3 +21,4 @@ 8150024c0e6fc5aef3033cf8aaa574896f6b5d0d latest_2014.08.11 2092948937ac30ef82f71463a235c66d34987088 release_2014.10.06 c437b28348a9345db8433e5b4f0e05ec8fb6c38a latest_2014.10.06 +2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee release_2015.01.13 https://bitbucket.org/galaxy/galaxy-central/commits/fd1946912be3/ Changeset: fd1946912be3 Branch: stable User: natefoo Date: 2015-01-13 15:37:28+00:00 Summary: Added tag latest_2015.01.13 for changeset 2e8dd2949dd3 Affected #: 1 file diff -r 055016b332aa41d559f9ec12c638734824292b54 -r fd1946912be3f0454e155effc10e5a2127388559 .hgtags --- a/.hgtags +++ b/.hgtags @@ -22,3 +22,4 @@ 2092948937ac30ef82f71463a235c66d34987088 release_2014.10.06 c437b28348a9345db8433e5b4f0e05ec8fb6c38a latest_2014.10.06 2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee release_2015.01.13 +2e8dd2949dd3eee0f56f9a3a5ebf1b2baca24aee latest_2015.01.13 Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org