2 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/714f5b1ac790/ Changeset: 714f5b1ac790 User: jmchilton Date: 2013-12-15 23:16:34 Summary: Move history contents filtering logic into model. Filtering for deleted, visible, and ids with ORM should prevent loading unneeded objects into memory (a history with 1000 items and 5 visible will now only cause 5 items to be loaded instead of 1000 just to filter out 5). Simplifies history_contents API logic somewhat, allows easier 'unit' testing (included), and provides a clearer entry point for additional 'showing' additional contents types (read dataset collections). Affected #: 3 files diff -r 4fe46fd3181bfb63e8093200d71801cb507b586d -r 714f5b1ac7902cc5b79d5fdf6390ea70b7e17afb lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -19,6 +19,7 @@ import socket import time from string import Template +from itertools import ifilter import galaxy.datatypes import galaxy.datatypes.registry @@ -43,6 +44,11 @@ # Default Value Required for unit tests datatypes_registry.load_datatypes() +# When constructing filters with in for a fixed set of ids, maximum +# number of items to place in the IN statement. Different databases +# are going to have different limits so it is likely best to not let +# this be unlimited - filter in Python if over this limit. +MAX_IN_FILTER_LENGTH = 100 class NoConverterException(Exception): def __init__(self, value): @@ -893,6 +899,32 @@ rval = galaxy.datatypes.data.nice_size( rval ) return rval + def contents_iter( self, **kwds ): + """ + Fetch filtered list of contents of history. + """ + python_filter = None + db_session = object_session( self ) + assert db_session != None + query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ) + deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) ) + if deleted is not None: + query = query.filter( HistoryDatasetAssociation.deleted == bool( kwds['deleted'] ) ) + visible = galaxy.util.string_as_bool_or_none( kwds.get( 'visible', None ) ) + if visible is not None: + query = query.filter( HistoryDatasetAssociation.visible == bool( kwds['visible'] ) ) + if 'ids' in kwds: + ids = kwds['ids'] + max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH) + if len(ids) < max_in_filter_length: + query = query.filter( HistoryDatasetAssociation.id.in_(ids) ) + else: + python_filter = lambda hda: hda.id in ids + if python_filter: + return ifilter(python_filter, query) + else: + return query + def copy_tags_from(self,target_user,source_history): for src_shta in source_history.tags: new_shta = src_shta.copy() diff -r 4fe46fd3181bfb63e8093200d71801cb507b586d -r 714f5b1ac7902cc5b79d5fdf6390ea70b7e17afb lib/galaxy/webapps/galaxy/api/history_contents.py --- a/lib/galaxy/webapps/galaxy/api/history_contents.py +++ b/lib/galaxy/webapps/galaxy/api/history_contents.py @@ -51,47 +51,28 @@ else: history = self.get_history( trans, history_id, check_ownership=True, check_accessible=True ) - # if ids, return _FULL_ data (as show) for each id passed + contents_kwds = {} if ids: - ids = ids.split( ',' ) - for index, hda in enumerate( history.datasets ): - encoded_hda_id = trans.security.encode_id( hda.id ) - if encoded_hda_id in ids: - #TODO: share code with show - rval.append( self._detailed_hda_dict( trans, hda ) ) - - # if no ids passed, return a _SUMMARY_ of _all_ datasets in the history + ids = map( lambda id: trans.security.decode_id( id ), ids.split( ',' ) ) + contents_kwds[ 'ids' ] = ids + # If explicit ids given, always used detailed result. + details = 'all' else: + contents_kwds[ 'deleted' ] = kwd.get( 'deleted', None ) + contents_kwds[ 'visible' ] = kwd.get( 'visible', None ) # details param allows a mixed set of summary and detailed hdas #TODO: this is getting convoluted due to backwards compat details = kwd.get( 'details', None ) or [] if details and details != 'all': details = util.listify( details ) - # by default return all datasets - even if deleted or hidden (defaulting the next switches to None) - # if specified return those datasets that match the setting - # backwards compat - return_deleted = util.string_as_bool_or_none( kwd.get( 'deleted', None ) ) - return_visible = util.string_as_bool_or_none( kwd.get( 'visible', None ) ) - - for hda in history.datasets: - # if either return_ setting has been requested (!= None), skip hdas that don't match the request - if return_deleted is not None: - if( ( return_deleted and not hda.deleted ) - or ( not return_deleted and hda.deleted ) ): - continue - if return_visible is not None: - if( ( return_visible and not hda.visible ) - or ( not return_visible and hda.visible ) ): - continue - - encoded_hda_id = trans.security.encode_id( hda.id ) - if( ( encoded_hda_id in details ) - or ( details == 'all' ) ): - rval.append( self._detailed_hda_dict( trans, hda ) ) - else: - rval.append( self._summary_hda_dict( trans, history_id, hda ) ) - + for hda in history.contents_iter( **contents_kwds ): + encoded_hda_id = trans.security.encode_id( hda.id ) + detailed = details == 'all' or ( encoded_hda_id in details ) + if detailed: + rval.append( self._detailed_hda_dict( trans, hda ) ) + else: + rval.append( self._summary_hda_dict( trans, history_id, hda ) ) except Exception, e: # for errors that are not specific to one hda (history lookup or summary list) rval = "Error in history API at listing contents: " + str( e ) diff -r 4fe46fd3181bfb63e8093200d71801cb507b586d -r 714f5b1ac7902cc5b79d5fdf6390ea70b7e17afb test/unit/test_galaxy_mapping.py --- a/test/unit/test_galaxy_mapping.py +++ b/test/unit/test_galaxy_mapping.py @@ -180,6 +180,39 @@ assert hist1.name == "History 2b" # gvk TODO need to ad test for GalaxySessions, but not yet sure what they should look like. + def test_history_contents( self ): + model = self.model + u = model.User( email="contents@foo.bar.baz", password="password" ) + # gs = model.GalaxySession() + h1 = model.History( name="HistoryContentsHistory1", user=u) + + self.persist( u, h1, expunge=True ) + + d1 = self.new_hda( h1, name="1" ) + d2 = self.new_hda( h1, name="2", visible=False ) + d3 = self.new_hda( h1, name="3", deleted=True ) + d4 = self.new_hda( h1, name="4", visible=False, deleted=True ) + + def contents_iter_names(**kwds): + history = model.context.query( model.History ).filter( + model.History.name == "HistoryContentsHistory1" + ).first() + return set( map( lambda hda: hda.name, history.contents_iter( **kwds ) ) ) + + assert contents_iter_names() == set( [ "1", "2", "3", "4" ] ) + assert contents_iter_names( deleted=False ) == set( [ "1", "2" ] ) + assert contents_iter_names( visible=True ) == set( [ "1", "3" ] ) + assert contents_iter_names( visible=False ) == set( [ "2", "4" ] ) + assert contents_iter_names( deleted=True, visible=False ) == set( [ "4" ] ) + + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ] ) == set( [ "1", "2", "3", "4" ] ) + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ], max_in_filter_length=1 ) == set( [ "1", "2", "3", "4" ] ) + + assert contents_iter_names( ids=[ d1.id, d3.id ] ) == set( [ "1", "3" ] ) + + def new_hda( self, history, **kwds ): + return self.persist( self.model.HistoryDatasetAssociation( history=history, create_dataset=True, sa_session=self.model.session, **kwds ), flush=True ) + @classmethod def setUpClass(cls): # Start the database and connect the mapping @@ -191,10 +224,16 @@ return cls.model.session.query( type ) @classmethod - def persist(cls, *args): + def persist(cls, *args, **kwargs): + session = cls.model.session + flush = kwargs.get('flush', True) for arg in args: - cls.model.session.add( arg ) - cls.expunge() + session.add( arg ) + if flush: + session.flush() + if kwargs.get('expunge', not flush): + cls.expunge() + return arg # Return last or only arg. @classmethod def expunge(cls): https://bitbucket.org/galaxy/galaxy-central/commits/8e448f2ac955/ Changeset: 8e448f2ac955 User: jmchilton Date: 2013-12-17 14:31:52 Summary: Merged in jmchilton/galaxy-central-fork-1 (pull request #275) Move history contents filtering logic into model. Affected #: 3 files diff -r 90a639389de4a4348a32e21d87a13de4b1a72f56 -r 8e448f2ac955e6d434476b18c59b2cffcfc55404 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -19,6 +19,7 @@ import socket import time from string import Template +from itertools import ifilter import galaxy.datatypes import galaxy.datatypes.registry @@ -43,6 +44,11 @@ # Default Value Required for unit tests datatypes_registry.load_datatypes() +# When constructing filters with in for a fixed set of ids, maximum +# number of items to place in the IN statement. Different databases +# are going to have different limits so it is likely best to not let +# this be unlimited - filter in Python if over this limit. +MAX_IN_FILTER_LENGTH = 100 class NoConverterException(Exception): def __init__(self, value): @@ -893,6 +899,32 @@ rval = galaxy.datatypes.data.nice_size( rval ) return rval + def contents_iter( self, **kwds ): + """ + Fetch filtered list of contents of history. + """ + python_filter = None + db_session = object_session( self ) + assert db_session != None + query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ) + deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) ) + if deleted is not None: + query = query.filter( HistoryDatasetAssociation.deleted == bool( kwds['deleted'] ) ) + visible = galaxy.util.string_as_bool_or_none( kwds.get( 'visible', None ) ) + if visible is not None: + query = query.filter( HistoryDatasetAssociation.visible == bool( kwds['visible'] ) ) + if 'ids' in kwds: + ids = kwds['ids'] + max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH) + if len(ids) < max_in_filter_length: + query = query.filter( HistoryDatasetAssociation.id.in_(ids) ) + else: + python_filter = lambda hda: hda.id in ids + if python_filter: + return ifilter(python_filter, query) + else: + return query + def copy_tags_from(self,target_user,source_history): for src_shta in source_history.tags: new_shta = src_shta.copy() diff -r 90a639389de4a4348a32e21d87a13de4b1a72f56 -r 8e448f2ac955e6d434476b18c59b2cffcfc55404 lib/galaxy/webapps/galaxy/api/history_contents.py --- a/lib/galaxy/webapps/galaxy/api/history_contents.py +++ b/lib/galaxy/webapps/galaxy/api/history_contents.py @@ -51,47 +51,28 @@ else: history = self.get_history( trans, history_id, check_ownership=True, check_accessible=True ) - # if ids, return _FULL_ data (as show) for each id passed + contents_kwds = {} if ids: - ids = ids.split( ',' ) - for index, hda in enumerate( history.datasets ): - encoded_hda_id = trans.security.encode_id( hda.id ) - if encoded_hda_id in ids: - #TODO: share code with show - rval.append( self._detailed_hda_dict( trans, hda ) ) - - # if no ids passed, return a _SUMMARY_ of _all_ datasets in the history + ids = map( lambda id: trans.security.decode_id( id ), ids.split( ',' ) ) + contents_kwds[ 'ids' ] = ids + # If explicit ids given, always used detailed result. + details = 'all' else: + contents_kwds[ 'deleted' ] = kwd.get( 'deleted', None ) + contents_kwds[ 'visible' ] = kwd.get( 'visible', None ) # details param allows a mixed set of summary and detailed hdas #TODO: this is getting convoluted due to backwards compat details = kwd.get( 'details', None ) or [] if details and details != 'all': details = util.listify( details ) - # by default return all datasets - even if deleted or hidden (defaulting the next switches to None) - # if specified return those datasets that match the setting - # backwards compat - return_deleted = util.string_as_bool_or_none( kwd.get( 'deleted', None ) ) - return_visible = util.string_as_bool_or_none( kwd.get( 'visible', None ) ) - - for hda in history.datasets: - # if either return_ setting has been requested (!= None), skip hdas that don't match the request - if return_deleted is not None: - if( ( return_deleted and not hda.deleted ) - or ( not return_deleted and hda.deleted ) ): - continue - if return_visible is not None: - if( ( return_visible and not hda.visible ) - or ( not return_visible and hda.visible ) ): - continue - - encoded_hda_id = trans.security.encode_id( hda.id ) - if( ( encoded_hda_id in details ) - or ( details == 'all' ) ): - rval.append( self._detailed_hda_dict( trans, hda ) ) - else: - rval.append( self._summary_hda_dict( trans, history_id, hda ) ) - + for hda in history.contents_iter( **contents_kwds ): + encoded_hda_id = trans.security.encode_id( hda.id ) + detailed = details == 'all' or ( encoded_hda_id in details ) + if detailed: + rval.append( self._detailed_hda_dict( trans, hda ) ) + else: + rval.append( self._summary_hda_dict( trans, history_id, hda ) ) except Exception, e: # for errors that are not specific to one hda (history lookup or summary list) rval = "Error in history API at listing contents: " + str( e ) diff -r 90a639389de4a4348a32e21d87a13de4b1a72f56 -r 8e448f2ac955e6d434476b18c59b2cffcfc55404 test/unit/test_galaxy_mapping.py --- a/test/unit/test_galaxy_mapping.py +++ b/test/unit/test_galaxy_mapping.py @@ -180,6 +180,39 @@ assert hist1.name == "History 2b" # gvk TODO need to ad test for GalaxySessions, but not yet sure what they should look like. + def test_history_contents( self ): + model = self.model + u = model.User( email="contents@foo.bar.baz", password="password" ) + # gs = model.GalaxySession() + h1 = model.History( name="HistoryContentsHistory1", user=u) + + self.persist( u, h1, expunge=True ) + + d1 = self.new_hda( h1, name="1" ) + d2 = self.new_hda( h1, name="2", visible=False ) + d3 = self.new_hda( h1, name="3", deleted=True ) + d4 = self.new_hda( h1, name="4", visible=False, deleted=True ) + + def contents_iter_names(**kwds): + history = model.context.query( model.History ).filter( + model.History.name == "HistoryContentsHistory1" + ).first() + return set( map( lambda hda: hda.name, history.contents_iter( **kwds ) ) ) + + assert contents_iter_names() == set( [ "1", "2", "3", "4" ] ) + assert contents_iter_names( deleted=False ) == set( [ "1", "2" ] ) + assert contents_iter_names( visible=True ) == set( [ "1", "3" ] ) + assert contents_iter_names( visible=False ) == set( [ "2", "4" ] ) + assert contents_iter_names( deleted=True, visible=False ) == set( [ "4" ] ) + + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ] ) == set( [ "1", "2", "3", "4" ] ) + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ], max_in_filter_length=1 ) == set( [ "1", "2", "3", "4" ] ) + + assert contents_iter_names( ids=[ d1.id, d3.id ] ) == set( [ "1", "3" ] ) + + def new_hda( self, history, **kwds ): + return self.persist( self.model.HistoryDatasetAssociation( history=history, create_dataset=True, sa_session=self.model.session, **kwds ), flush=True ) + @classmethod def setUpClass(cls): # Start the database and connect the mapping @@ -191,10 +224,16 @@ return cls.model.session.query( type ) @classmethod - def persist(cls, *args): + def persist(cls, *args, **kwargs): + session = cls.model.session + flush = kwargs.get('flush', True) for arg in args: - cls.model.session.add( arg ) - cls.expunge() + session.add( arg ) + if flush: + session.flush() + if kwargs.get('expunge', not flush): + cls.expunge() + return arg # Return last or only arg. @classmethod def expunge(cls): Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.