details: http://www.bx.psu.edu/hg/galaxy/rev/e01bfc281e09 changeset: 2508:e01bfc281e09 user: Nate Coraor nate@bx.psu.edu date: Tue Jul 28 14:16:19 2009 -0400 description: More secure 'display at' functionality - allows "private" datasets to be displayed at external sites without making them public. Here's how:
1. Replace the 'display at UCSC <site>' direct link with one back to Galaxy at /dataset/<id>/display_at/ucsc_<site> 2. When the client requests the above path, if the dataset is "private", add a record to the new history_dataset_association_display_at_authorization table. note: DATASET_MANAGE_PERMISSIONS is required for this step. If the user doesn't have appropriate permissions, an error is displayed. note 2: "public" datasets don't get a record since they are already readable by external sites. 3. Send the client a redirect to the appropriate external URL. 4. When the external resource requests the dataset (via /root/display_as), allow access if: a. an associated (by hda id) record exists in the above table AND b. the remote host matches one of the known valid servers for that <site> AND c. it's been no longer than 60 seconds since the record was updated (e.g. the lame host authorization expires after 60 seconds)
During the 60 second window, anyone could potentially view the authorized dataset via UCSC, but they'd have to know the ID *and* know that it was just authorized. In the future, it'd be more secure to exchange keys with external sites or similar strategies, but this would require modifications to these external applications.
This new method is implemented for UCSC, other display apps will need to be handled as well, but this should be as simple as adding those sites/servers to the list in lib/galaxy/security/__init__.py and modifying the link generation in the display app.
9 file(s) affected in this change:
lib/galaxy/app.py lib/galaxy/datatypes/genetics.py lib/galaxy/datatypes/interval.py lib/galaxy/model/__init__.py lib/galaxy/model/mapping.py lib/galaxy/model/migrate/versions/0010_hda_display_at_authz_table.py lib/galaxy/security/__init__.py lib/galaxy/web/controllers/dataset.py lib/galaxy/web/controllers/root.py
diffs (295 lines):
diff -r 0f18a77ca03e -r e01bfc281e09 lib/galaxy/app.py --- a/lib/galaxy/app.py Mon Jul 27 15:55:32 2009 -0400 +++ b/lib/galaxy/app.py Tue Jul 28 14:16:19 2009 -0400 @@ -41,6 +41,7 @@ self.datatypes_registry.load_datatype_indexers( self.toolbox ) #Load security policy self.security_agent = self.model.security_agent + self.host_security_agent = galaxy.security.HostAgent( model=self.security_agent.model, permitted_actions=self.security_agent.permitted_actions ) # Heartbeat and memdump for thread / heap profiling self.heartbeat = None self.memdump = None diff -r 0f18a77ca03e -r e01bfc281e09 lib/galaxy/datatypes/genetics.py --- a/lib/galaxy/datatypes/genetics.py Mon Jul 27 15:55:32 2009 -0400 +++ b/lib/galaxy/datatypes/genetics.py Tue Jul 28 14:16:19 2009 -0400 @@ -86,9 +86,10 @@ sl.append("&hgGenome_dataSetName=%s&hgGenome_dataSetDescription=%s" % (dataset.name, 'GalaxyGG_data')) sl.append("&hgGenome_formatType=best%20guess&hgGenome_markerType=best%20guess") sl.append("&hgGenome_columnLabels=first%20row&hgGenome_maxVal=&hgGenome_labelVals=") - sl.append("&hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=") - s = ''.join(sl) - link = "%s%s%s" % (s, display_url, ggtail ) + sl.append("&hgGenome_maxGapToFill=25000000&hgGenome_uploadFile=%%s") + sl.append(ggtail) + s = urllib.quote_plus( ''.join(sl) ) + link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, s, display_url ) ret_val.append( (site_name, link) ) return ret_val
diff -r 0f18a77ca03e -r e01bfc281e09 lib/galaxy/datatypes/interval.py --- a/lib/galaxy/datatypes/interval.py Mon Jul 27 15:55:32 2009 -0400 +++ b/lib/galaxy/datatypes/interval.py Tue Jul 28 14:16:19 2009 -0400 @@ -233,10 +233,12 @@ # if our URL scheme is https. Making this work # requires additional hackery in your upstream proxy. # If UCSC ever supports https, remove this hack. + internal_url = "%s" % url_for( controller='dataset', dataset_id=dataset.id, action='display_at', filename='ucsc_' + site_name ) if base_url.startswith( 'https://' ): base_url = base_url.replace( 'https', 'http', 1 ) - display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s" % (base_url, url_for( controller='root' ), dataset.id, type) ) - link = "%sdb=%s&position=%s:%s-%s&hgt.customText=%s" % (site_url, dataset.dbkey, chrom, start, stop, display_url ) + display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s&authz_method=display_at" % (base_url, url_for( controller='root' ), dataset.id, type) ) + redirect_url = urllib.quote_plus( "%sdb=%s&position=%s:%s-%s&hgt.customText=%%s" % (site_url, dataset.dbkey, chrom, start, stop ) ) + link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, redirect_url, display_url ) ret_val.append( (site_name, link) ) return ret_val
@@ -892,8 +894,12 @@ stop = viewport_tuple[2] for site_name, site_url in util.get_ucsc_by_build(dataset.dbkey): if site_name in app.config.ucsc_display_sites: - display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s" % (base_url, url_for( controller='root' ), dataset.id, type) ) - link = "%sdb=%s&position=%s:%s-%s&hgt.customText=%s" % (site_url, dataset.dbkey, chrom, start, stop, display_url ) + internal_url = "%s" % url_for( controller='dataset', dataset_id=dataset.id, action='display_at', filename='ucsc_' + site_name ) + if base_url.startswith( 'https://' ): + base_url = base_url.replace( 'https', 'http', 1 ) + display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s&authz_method=display_at" % (base_url, url_for( controller='root' ), dataset.id, type) ) + redirect_url = urllib.quote_plus( "%sdb=%s&position=%s:%s-%s&hgt.customText=%%s" % (site_url, dataset.dbkey, chrom, start, stop ) ) + link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, redirect_url, display_url ) ret_val.append( (site_name, link) ) return ret_val def sniff( self, filename ): diff -r 0f18a77ca03e -r e01bfc281e09 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py Mon Jul 27 15:55:32 2009 -0400 +++ b/lib/galaxy/model/__init__.py Tue Jul 28 14:16:19 2009 -0400 @@ -689,6 +689,12 @@ if not metadata_safe or not assoc.metadata_safe: assoc.clear( purge = purge )
+class HistoryDatasetAssociationDisplayAtAuthorization( object ): + def __init__( self, hda=None, user=None, site=None ): + self.history_dataset_association = hda + self.user = user + self.site = site + class Library( object ): permitted_actions = get_permitted_actions( filter='LIBRARY' ) def __init__( self, name = None, description = None, root_folder = None ): diff -r 0f18a77ca03e -r e01bfc281e09 lib/galaxy/model/mapping.py --- a/lib/galaxy/model/mapping.py Mon Jul 27 15:55:32 2009 -0400 +++ b/lib/galaxy/model/mapping.py Tue Jul 28 14:16:19 2009 -0400 @@ -96,6 +96,14 @@ Column( "external_filename" , TEXT ), Column( "_extra_files_path", TEXT ), Column( 'file_size', Numeric( 15, 0 ) ) ) + +HistoryDatasetAssociationDisplayAtAuthorization.table = Table( "history_dataset_association_display_at_authorization", metadata, + Column( "id", Integer, primary_key=True ), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, index=True, default=now, onupdate=now ), + Column( "history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), index=True ), + Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ), + Column( "site", TrimmedString( 255 ) ) )
ImplicitlyConvertedDatasetAssociation.table = Table( "implicitly_converted_dataset_association", metadata, Column( "id", Integer, primary_key=True ), @@ -710,6 +718,10 @@ primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) ) ) )
+assign_mapper( context, HistoryDatasetAssociationDisplayAtAuthorization, HistoryDatasetAssociationDisplayAtAuthorization.table, + properties=dict( history_dataset_association = relation( HistoryDatasetAssociation ), + user = relation( User ) ) ) + assign_mapper( context, ImplicitlyConvertedDatasetAssociation, ImplicitlyConvertedDatasetAssociation.table, properties=dict( parent=relation( HistoryDatasetAssociation, diff -r 0f18a77ca03e -r e01bfc281e09 lib/galaxy/model/migrate/versions/0010_hda_display_at_authz_table.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/model/migrate/versions/0010_hda_display_at_authz_table.py Tue Jul 28 14:16:19 2009 -0400 @@ -0,0 +1,47 @@ +from sqlalchemy import * +from sqlalchemy.orm import * +from sqlalchemy.exceptions import * +from migrate import * +from migrate.changeset import * + +import datetime +now = datetime.datetime.utcnow + +import sys, logging +log = logging.getLogger( __name__ ) +log.setLevel(logging.DEBUG) +handler = logging.StreamHandler( sys.stdout ) +format = "%(name)s %(levelname)s %(asctime)s %(message)s" +formatter = logging.Formatter( format ) +handler.setFormatter( formatter ) +log.addHandler( handler ) + +# Need our custom types, but don't import anything else from model +from galaxy.model.custom_types import * + +metadata = MetaData( migrate_engine ) +db_session = scoped_session( sessionmaker( bind=migrate_engine, autoflush=False, transactional=False ) ) + +HistoryDatasetAssociationDisplayAtAuthorization_table = Table( "history_dataset_association_display_at_authorization", metadata, + Column( "id", Integer, primary_key=True ), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, index=True, default=now, onupdate=now ), + Column( "history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), index=True ), + Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ), + Column( "site", TrimmedString( 255 ) ) ) + +def upgrade(): + # Load existing tables + metadata.reflect() + try: + HistoryDatasetAssociationDisplayAtAuthorization_table.create() + except Exception, e: + log.debug( "Creating history_dataset_association_display_at_authorization table failed: %s" % str( e ) ) + +def downgrade(): + # Load existing tables + metadata.reflect() + try: + HistoryDatasetAssociationDisplayAtAuthorization_table.drop() + except Exception, e: + log.debug( "Dropping history_dataset_association_display_at_authorization table failed: %s" % str( e ) ) diff -r 0f18a77ca03e -r e01bfc281e09 lib/galaxy/security/__init__.py --- a/lib/galaxy/security/__init__.py Mon Jul 27 15:55:32 2009 -0400 +++ b/lib/galaxy/security/__init__.py Tue Jul 28 14:16:19 2009 -0400 @@ -2,7 +2,8 @@ Galaxy Security
""" -import logging +import logging, socket +from datetime import datetime, timedelta from galaxy.util.bunch import Bunch from galaxy.model.orm import *
@@ -484,6 +485,65 @@ else: raise 'Passed an illegal object to check_folder_contents: %s' % type( entry )
+class HostAgent( RBACAgent ): + """ + A simple security agent which allows access to datasets based on host. + This exists so that externals sites such as UCSC can gain access to + datasets which have permissions which would normally prevent such access. + """ + # TODO: Make sites user configurable + sites = Bunch( + ucsc_main = ( 'hgw1.cse.ucsc.edu', 'hgw2.cse.ucsc.edu', 'hgw3.cse.ucsc.edu', 'hgw4.cse.ucsc.edu', + 'hgw5.cse.ucsc.edu', 'hgw6.cse.ucsc.edu', 'hgw7.cse.ucsc.edu', 'hgw8.cse.ucsc.edu' ), + ucsc_test = ( 'hgwdev.cse.ucsc.edu', ), + ucsc_archaea = ( 'lowepub.cse.ucsc.edu', ) + ) + def __init__( self, model, permitted_actions=None ): + self.model = model + if permitted_actions: + self.permitted_actions = permitted_actions + def allow_action( self, addr, action, **kwd ): + if 'dataset' in kwd and action == self.permitted_actions.DATASET_ACCESS: + hda = kwd['dataset'] + if action == self.permitted_actions.DATASET_ACCESS and action.action not in [ dp.action for dp in hda.dataset.actions ]: + log.debug( 'Allowing access to public dataset with hda: %i.' % hda.id ) + return True # dataset has no roles associated with the access permission, thus is already public + hdadaa = self.model.HistoryDatasetAssociationDisplayAtAuthorization.filter_by( history_dataset_association_id = hda.id ).first() + if not hdadaa: + log.debug( 'Denying access to private dataset with hda: %i. No hdadaa record for this dataset.' % hda.id ) + return False # no auth + # We could just look up the reverse of addr, but then we'd also + # have to verify it with the forward address and special case any + # IPs (instead of hosts) in the server list. + # + # This would be improved by caching, but that's what the OS's name + # service cache daemon is for (you ARE running nscd, right?). + for server in HostAgent.sites.get( hdadaa.site, [] ): + # We're going to search in order, but if the remote site is load + # balancing their connections (as UCSC does), this is okay. + try: + if socket.gethostbyname( server ) == addr: + break # remote host is in the server list + except ( socket.error, socket.gaierror ): + pass # can't resolve, try next + else: + log.debug( 'Denying access to private dataset with hda: %i. Remote addr is not a valid server for site: %s.' % ( hda.id, hdadaa.site ) ) + return False # remote addr is not in the server list + if ( datetime.utcnow() - hdadaa.update_time ) > timedelta( seconds=60 ): + log.debug( 'Denying access to private dataset with hda: %i. Authorization was granted, but has expired.' % hda.id ) + return False # not authz'd in the last 60 seconds + log.debug( 'Allowing access to private dataset with hda: %i. Remote server is: %s.' % ( hda.id, server ) ) + return True + else: + raise 'The dataset access permission is the only valid permission in the host security agent.' + def set_dataset_permissions( self, hda, user, site ): + hdadaa = self.model.HistoryDatasetAssociationDisplayAtAuthorization.filter_by( history_dataset_association_id = hda.id ).first() + if hdadaa: + hdadaa.update_time = datetime.utcnow() + else: + hdadaa = self.model.HistoryDatasetAssociationDisplayAtAuthorization( hda=hda, user=user, site=site ) + hdadaa.flush() + def get_permitted_actions( filter=None ): '''Utility method to return a subset of RBACAgent's permitted actions''' if filter is None: diff -r 0f18a77ca03e -r e01bfc281e09 lib/galaxy/web/controllers/dataset.py --- a/lib/galaxy/web/controllers/dataset.py Mon Jul 27 15:55:32 2009 -0400 +++ b/lib/galaxy/web/controllers/dataset.py Tue Jul 28 14:16:19 2009 -0400 @@ -1,4 +1,4 @@ -import logging, os, sets, string, shutil, re, socket, mimetypes, smtplib +import logging, os, sets, string, shutil, re, socket, mimetypes, smtplib, urllib
from galaxy.web.base.controller import * from galaxy import util, datatypes, jobs, web, model @@ -132,6 +132,24 @@ else: return trans.show_error_message( "You are not allowed to access this dataset" )
+ @web.expose + def display_at( self, trans, dataset_id, filename=None, **kwd ): + """Sets up a dataset permissions so it is viewable at an external site""" + site = filename + data = trans.app.model.HistoryDatasetAssociation.get( dataset_id ) + if not data: + raise paste.httpexceptions.HTTPRequestRangeNotSatisfiable( "Invalid reference dataset id: %s." % str( dataset_id ) ) + if 'display_url' not in kwd or 'redirect_url' not in kwd: + return trans.show_error_message( 'Invalid parameters specified for "display at" link, please contact a Galaxy administrator' ) + redirect_url = kwd['redirect_url'] % urllib.quote_plus( kwd['display_url'] ) + if trans.app.security_agent.allow_action( None, data.permitted_actions.DATASET_ACCESS, dataset = data ): + return trans.response.send_redirect( redirect_url ) # anon access already permitted by rbac + if trans.app.security_agent.allow_action( trans.user, data.permitted_actions.DATASET_MANAGE_PERMISSIONS, dataset = data ): + trans.app.host_security_agent.set_dataset_permissions( data, trans.user, site ) + return trans.response.send_redirect( redirect_url ) + else: + return trans.show_error_message( "You are not allowed to view this dataset at external sites. Please contact your Galaxy administrator to acquire management permissions for this dataset." ) + def _undelete( self, trans, id ): try: id = int( id ) diff -r 0f18a77ca03e -r e01bfc281e09 lib/galaxy/web/controllers/root.py --- a/lib/galaxy/web/controllers/root.py Mon Jul 27 15:55:32 2009 -0400 +++ b/lib/galaxy/web/controllers/root.py Tue Jul 28 14:16:19 2009 -0400 @@ -197,13 +197,19 @@ def display_as( self, trans, id=None, display_app=None, **kwd ): """Returns a file in a format that can successfully be displayed in display_app""" data = self.app.model.HistoryDatasetAssociation.get( id ) + authz_method = 'rbac' + if 'authz_method' in kwd: + authz_method = kwd['authz_method'] if data: - if trans.app.security_agent.allow_action( trans.user, data.permitted_actions.DATASET_ACCESS, dataset = data ): + if authz_method == 'rbac' and trans.app.security_agent.allow_action( trans.user, data.permitted_actions.DATASET_ACCESS, dataset = data ): trans.response.set_content_type( data.get_mime() ) trans.log_event( "Formatted dataset id %s for display at %s" % ( str( id ), display_app ) ) return data.as_display_type( display_app, **kwd ) + elif authz_method == 'display_at' and trans.app.host_security_agent.allow_action( trans.request.remote_addr, data.permitted_actions.DATASET_ACCESS, dataset = data ): + trans.response.set_content_type( data.get_mime() ) + return data.as_display_type( display_app, **kwd ) else: - return "You are not privileged to access this dataset." + return "You are not allowed to access this dataset." else: return "No data with id=%d" % id