3 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/d8c2fe8e1594/ Changeset: d8c2fe8e1594 User: jmchilton Date: 2014-01-04 04:58:12 Summary: Backout of Backout of 714f5b1 for now. ... or would that be back-into 714f5b1? 714f5b1 - Move history contents filtering logic into model. Filtering for deleted, visible, and ids with ORM should prevent loading unneeded objects into memory (a history with 1000 items and 5 visible will now only cause 5 items to be loaded instead of 1000 just to filter out 5). Simplifies history_contents API logic somewhat, allows easier 'unit' testing (included), and provides a clearer entry point for additional 'showing' additional contents types (read dataset collections). Affected #: 3 files diff -r f11a729d42cb66753b8b1616ad625c1b2a306b3d -r d8c2fe8e159494e972f624cc4c73b7ff7ce4099f lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -19,6 +19,7 @@ import socket import time from string import Template +from itertools import ifilter import galaxy.datatypes import galaxy.datatypes.registry @@ -43,6 +44,11 @@ # Default Value Required for unit tests datatypes_registry.load_datatypes() +# When constructing filters with in for a fixed set of ids, maximum +# number of items to place in the IN statement. Different databases +# are going to have different limits so it is likely best to not let +# this be unlimited - filter in Python if over this limit. +MAX_IN_FILTER_LENGTH = 100 class NoConverterException(Exception): def __init__(self, value): @@ -893,6 +899,32 @@ rval = galaxy.datatypes.data.nice_size( rval ) return rval + def contents_iter( self, **kwds ): + """ + Fetch filtered list of contents of history. + """ + python_filter = None + db_session = object_session( self ) + assert db_session != None + query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ) + deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) ) + if deleted is not None: + query = query.filter( HistoryDatasetAssociation.deleted == bool( kwds['deleted'] ) ) + visible = galaxy.util.string_as_bool_or_none( kwds.get( 'visible', None ) ) + if visible is not None: + query = query.filter( HistoryDatasetAssociation.visible == bool( kwds['visible'] ) ) + if 'ids' in kwds: + ids = kwds['ids'] + max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH) + if len(ids) < max_in_filter_length: + query = query.filter( HistoryDatasetAssociation.id.in_(ids) ) + else: + python_filter = lambda hda: hda.id in ids + if python_filter: + return ifilter(python_filter, query) + else: + return query + def copy_tags_from(self,target_user,source_history): for src_shta in source_history.tags: new_shta = src_shta.copy() diff -r f11a729d42cb66753b8b1616ad625c1b2a306b3d -r d8c2fe8e159494e972f624cc4c73b7ff7ce4099f lib/galaxy/webapps/galaxy/api/history_contents.py --- a/lib/galaxy/webapps/galaxy/api/history_contents.py +++ b/lib/galaxy/webapps/galaxy/api/history_contents.py @@ -51,47 +51,28 @@ else: history = self.get_history( trans, history_id, check_ownership=True, check_accessible=True ) - # if ids, return _FULL_ data (as show) for each id passed + contents_kwds = {} if ids: - ids = ids.split( ',' ) - for index, hda in enumerate( history.datasets ): - encoded_hda_id = trans.security.encode_id( hda.id ) - if encoded_hda_id in ids: - #TODO: share code with show - rval.append( self._detailed_hda_dict( trans, hda ) ) - - # if no ids passed, return a _SUMMARY_ of _all_ datasets in the history + ids = map( lambda id: trans.security.decode_id( id ), ids.split( ',' ) ) + contents_kwds[ 'ids' ] = ids + # If explicit ids given, always used detailed result. + details = 'all' else: + contents_kwds[ 'deleted' ] = kwd.get( 'deleted', None ) + contents_kwds[ 'visible' ] = kwd.get( 'visible', None ) # details param allows a mixed set of summary and detailed hdas #TODO: this is getting convoluted due to backwards compat details = kwd.get( 'details', None ) or [] if details and details != 'all': details = util.listify( details ) - # by default return all datasets - even if deleted or hidden (defaulting the next switches to None) - # if specified return those datasets that match the setting - # backwards compat - return_deleted = util.string_as_bool_or_none( kwd.get( 'deleted', None ) ) - return_visible = util.string_as_bool_or_none( kwd.get( 'visible', None ) ) - - for hda in history.datasets: - # if either return_ setting has been requested (!= None), skip hdas that don't match the request - if return_deleted is not None: - if( ( return_deleted and not hda.deleted ) - or ( not return_deleted and hda.deleted ) ): - continue - if return_visible is not None: - if( ( return_visible and not hda.visible ) - or ( not return_visible and hda.visible ) ): - continue - - encoded_hda_id = trans.security.encode_id( hda.id ) - if( ( encoded_hda_id in details ) - or ( details == 'all' ) ): - rval.append( self._detailed_hda_dict( trans, hda ) ) - else: - rval.append( self._summary_hda_dict( trans, history_id, hda ) ) - + for hda in history.contents_iter( **contents_kwds ): + encoded_hda_id = trans.security.encode_id( hda.id ) + detailed = details == 'all' or ( encoded_hda_id in details ) + if detailed: + rval.append( self._detailed_hda_dict( trans, hda ) ) + else: + rval.append( self._summary_hda_dict( trans, history_id, hda ) ) except Exception, e: # for errors that are not specific to one hda (history lookup or summary list) rval = "Error in history API at listing contents: " + str( e ) diff -r f11a729d42cb66753b8b1616ad625c1b2a306b3d -r d8c2fe8e159494e972f624cc4c73b7ff7ce4099f test/unit/test_galaxy_mapping.py --- a/test/unit/test_galaxy_mapping.py +++ b/test/unit/test_galaxy_mapping.py @@ -180,6 +180,39 @@ assert hist1.name == "History 2b" # gvk TODO need to ad test for GalaxySessions, but not yet sure what they should look like. + def test_history_contents( self ): + model = self.model + u = model.User( email="contents@foo.bar.baz", password="password" ) + # gs = model.GalaxySession() + h1 = model.History( name="HistoryContentsHistory1", user=u) + + self.persist( u, h1, expunge=True ) + + d1 = self.new_hda( h1, name="1" ) + d2 = self.new_hda( h1, name="2", visible=False ) + d3 = self.new_hda( h1, name="3", deleted=True ) + d4 = self.new_hda( h1, name="4", visible=False, deleted=True ) + + def contents_iter_names(**kwds): + history = model.context.query( model.History ).filter( + model.History.name == "HistoryContentsHistory1" + ).first() + return set( map( lambda hda: hda.name, history.contents_iter( **kwds ) ) ) + + assert contents_iter_names() == set( [ "1", "2", "3", "4" ] ) + assert contents_iter_names( deleted=False ) == set( [ "1", "2" ] ) + assert contents_iter_names( visible=True ) == set( [ "1", "3" ] ) + assert contents_iter_names( visible=False ) == set( [ "2", "4" ] ) + assert contents_iter_names( deleted=True, visible=False ) == set( [ "4" ] ) + + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ] ) == set( [ "1", "2", "3", "4" ] ) + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ], max_in_filter_length=1 ) == set( [ "1", "2", "3", "4" ] ) + + assert contents_iter_names( ids=[ d1.id, d3.id ] ) == set( [ "1", "3" ] ) + + def new_hda( self, history, **kwds ): + return self.persist( self.model.HistoryDatasetAssociation( history=history, create_dataset=True, sa_session=self.model.session, **kwds ), flush=True ) + @classmethod def setUpClass(cls): # Start the database and connect the mapping @@ -191,10 +224,16 @@ return cls.model.session.query( type ) @classmethod - def persist(cls, *args): + def persist(cls, *args, **kwargs): + session = cls.model.session + flush = kwargs.get('flush', True) for arg in args: - cls.model.session.add( arg ) - cls.expunge() + session.add( arg ) + if flush: + session.flush() + if kwargs.get('expunge', not flush): + cls.expunge() + return arg # Return last or only arg. @classmethod def expunge(cls): https://bitbucket.org/galaxy/galaxy-central/commits/d055c34bc4e0/ Changeset: d055c34bc4e0 User: jmchilton Date: 2014-01-04 04:58:13 Summary: Fix history contents ordering broken with previous changeset. Add stronger test cases, though these are still insufficient to exercise the error. The only way I was able to get the history contents to come out misordered was to use postgres. Nonetheless, using postgres this now orders datasets properly. Affected #: 2 files diff -r d8c2fe8e159494e972f624cc4c73b7ff7ce4099f -r d055c34bc4e06c58dae118eeb8861cfed479fc52 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -907,6 +907,7 @@ db_session = object_session( self ) assert db_session != None query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ) + query = query.order_by( HistoryDatasetAssociation.table.c.hid.asc() ) deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) ) if deleted is not None: query = query.filter( HistoryDatasetAssociation.deleted == bool( kwds['deleted'] ) ) diff -r d8c2fe8e159494e972f624cc4c73b7ff7ce4099f -r d055c34bc4e06c58dae118eeb8861cfed479fc52 test/unit/test_galaxy_mapping.py --- a/test/unit/test_galaxy_mapping.py +++ b/test/unit/test_galaxy_mapping.py @@ -186,32 +186,34 @@ # gs = model.GalaxySession() h1 = model.History( name="HistoryContentsHistory1", user=u) - self.persist( u, h1, expunge=True ) + self.persist( u, h1, expunge=False ) d1 = self.new_hda( h1, name="1" ) d2 = self.new_hda( h1, name="2", visible=False ) d3 = self.new_hda( h1, name="3", deleted=True ) d4 = self.new_hda( h1, name="4", visible=False, deleted=True ) + self.session().flush() + def contents_iter_names(**kwds): history = model.context.query( model.History ).filter( model.History.name == "HistoryContentsHistory1" ).first() - return set( map( lambda hda: hda.name, history.contents_iter( **kwds ) ) ) + return list( map( lambda hda: hda.name, history.contents_iter( **kwds ) ) ) - assert contents_iter_names() == set( [ "1", "2", "3", "4" ] ) - assert contents_iter_names( deleted=False ) == set( [ "1", "2" ] ) - assert contents_iter_names( visible=True ) == set( [ "1", "3" ] ) - assert contents_iter_names( visible=False ) == set( [ "2", "4" ] ) - assert contents_iter_names( deleted=True, visible=False ) == set( [ "4" ] ) + self.assertEquals(contents_iter_names(), [ "1", "2", "3", "4" ]) + assert contents_iter_names( deleted=False ) == [ "1", "2" ] + assert contents_iter_names( visible=True ) == [ "1", "3" ] + assert contents_iter_names( visible=False ) == [ "2", "4" ] + assert contents_iter_names( deleted=True, visible=False ) == [ "4" ] - assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ] ) == set( [ "1", "2", "3", "4" ] ) - assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ], max_in_filter_length=1 ) == set( [ "1", "2", "3", "4" ] ) + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ] ) == [ "1", "2", "3", "4" ] + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ], max_in_filter_length=1 ) == [ "1", "2", "3", "4" ] - assert contents_iter_names( ids=[ d1.id, d3.id ] ) == set( [ "1", "3" ] ) + assert contents_iter_names( ids=[ d1.id, d3.id ] ) == [ "1", "3" ] def new_hda( self, history, **kwds ): - return self.persist( self.model.HistoryDatasetAssociation( history=history, create_dataset=True, sa_session=self.model.session, **kwds ), flush=True ) + return history.add_dataset( self.model.HistoryDatasetAssociation( create_dataset=True, sa_session=self.model.session, **kwds ) ) @classmethod def setUpClass(cls): @@ -225,7 +227,7 @@ @classmethod def persist(cls, *args, **kwargs): - session = cls.model.session + session = cls.session() flush = kwargs.get('flush', True) for arg in args: session.add( arg ) @@ -236,6 +238,10 @@ return arg # Return last or only arg. @classmethod + def session(cls): + return cls.model.session + + @classmethod def expunge(cls): cls.model.session.flush() cls.model.session.expunge_all() https://bitbucket.org/galaxy/galaxy-central/commits/6f63fc8b7464/ Changeset: 6f63fc8b7464 User: carlfeberhard Date: 2014-01-07 21:48:55 Summary: Merged in jmchilton/galaxy-central-fork-1 (pull request #289) Move history contents filtering logic into model. Affected #: 3 files diff -r 0c5e20558f6bb8d690dd9c9d25c172f08e837b1b -r 6f63fc8b7464f58952522dc35df1544cd40563e3 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -19,6 +19,7 @@ import socket import time from string import Template +from itertools import ifilter import galaxy.datatypes import galaxy.datatypes.registry @@ -43,6 +44,11 @@ # Default Value Required for unit tests datatypes_registry.load_datatypes() +# When constructing filters with in for a fixed set of ids, maximum +# number of items to place in the IN statement. Different databases +# are going to have different limits so it is likely best to not let +# this be unlimited - filter in Python if over this limit. +MAX_IN_FILTER_LENGTH = 100 class NoConverterException(Exception): def __init__(self, value): @@ -893,6 +899,33 @@ rval = galaxy.datatypes.data.nice_size( rval ) return rval + def contents_iter( self, **kwds ): + """ + Fetch filtered list of contents of history. + """ + python_filter = None + db_session = object_session( self ) + assert db_session != None + query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ) + query = query.order_by( HistoryDatasetAssociation.table.c.hid.asc() ) + deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) ) + if deleted is not None: + query = query.filter( HistoryDatasetAssociation.deleted == bool( kwds['deleted'] ) ) + visible = galaxy.util.string_as_bool_or_none( kwds.get( 'visible', None ) ) + if visible is not None: + query = query.filter( HistoryDatasetAssociation.visible == bool( kwds['visible'] ) ) + if 'ids' in kwds: + ids = kwds['ids'] + max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH) + if len(ids) < max_in_filter_length: + query = query.filter( HistoryDatasetAssociation.id.in_(ids) ) + else: + python_filter = lambda hda: hda.id in ids + if python_filter: + return ifilter(python_filter, query) + else: + return query + def copy_tags_from(self,target_user,source_history): for src_shta in source_history.tags: new_shta = src_shta.copy() diff -r 0c5e20558f6bb8d690dd9c9d25c172f08e837b1b -r 6f63fc8b7464f58952522dc35df1544cd40563e3 lib/galaxy/webapps/galaxy/api/history_contents.py --- a/lib/galaxy/webapps/galaxy/api/history_contents.py +++ b/lib/galaxy/webapps/galaxy/api/history_contents.py @@ -51,47 +51,28 @@ else: history = self.get_history( trans, history_id, check_ownership=True, check_accessible=True ) - # if ids, return _FULL_ data (as show) for each id passed + contents_kwds = {} if ids: - ids = ids.split( ',' ) - for index, hda in enumerate( history.datasets ): - encoded_hda_id = trans.security.encode_id( hda.id ) - if encoded_hda_id in ids: - #TODO: share code with show - rval.append( self._detailed_hda_dict( trans, hda ) ) - - # if no ids passed, return a _SUMMARY_ of _all_ datasets in the history + ids = map( lambda id: trans.security.decode_id( id ), ids.split( ',' ) ) + contents_kwds[ 'ids' ] = ids + # If explicit ids given, always used detailed result. + details = 'all' else: + contents_kwds[ 'deleted' ] = kwd.get( 'deleted', None ) + contents_kwds[ 'visible' ] = kwd.get( 'visible', None ) # details param allows a mixed set of summary and detailed hdas #TODO: this is getting convoluted due to backwards compat details = kwd.get( 'details', None ) or [] if details and details != 'all': details = util.listify( details ) - # by default return all datasets - even if deleted or hidden (defaulting the next switches to None) - # if specified return those datasets that match the setting - # backwards compat - return_deleted = util.string_as_bool_or_none( kwd.get( 'deleted', None ) ) - return_visible = util.string_as_bool_or_none( kwd.get( 'visible', None ) ) - - for hda in history.datasets: - # if either return_ setting has been requested (!= None), skip hdas that don't match the request - if return_deleted is not None: - if( ( return_deleted and not hda.deleted ) - or ( not return_deleted and hda.deleted ) ): - continue - if return_visible is not None: - if( ( return_visible and not hda.visible ) - or ( not return_visible and hda.visible ) ): - continue - - encoded_hda_id = trans.security.encode_id( hda.id ) - if( ( encoded_hda_id in details ) - or ( details == 'all' ) ): - rval.append( self._detailed_hda_dict( trans, hda ) ) - else: - rval.append( self._summary_hda_dict( trans, history_id, hda ) ) - + for hda in history.contents_iter( **contents_kwds ): + encoded_hda_id = trans.security.encode_id( hda.id ) + detailed = details == 'all' or ( encoded_hda_id in details ) + if detailed: + rval.append( self._detailed_hda_dict( trans, hda ) ) + else: + rval.append( self._summary_hda_dict( trans, history_id, hda ) ) except Exception, e: # for errors that are not specific to one hda (history lookup or summary list) rval = "Error in history API at listing contents: " + str( e ) diff -r 0c5e20558f6bb8d690dd9c9d25c172f08e837b1b -r 6f63fc8b7464f58952522dc35df1544cd40563e3 test/unit/test_galaxy_mapping.py --- a/test/unit/test_galaxy_mapping.py +++ b/test/unit/test_galaxy_mapping.py @@ -180,6 +180,41 @@ assert hist1.name == "History 2b" # gvk TODO need to ad test for GalaxySessions, but not yet sure what they should look like. + def test_history_contents( self ): + model = self.model + u = model.User( email="contents@foo.bar.baz", password="password" ) + # gs = model.GalaxySession() + h1 = model.History( name="HistoryContentsHistory1", user=u) + + self.persist( u, h1, expunge=False ) + + d1 = self.new_hda( h1, name="1" ) + d2 = self.new_hda( h1, name="2", visible=False ) + d3 = self.new_hda( h1, name="3", deleted=True ) + d4 = self.new_hda( h1, name="4", visible=False, deleted=True ) + + self.session().flush() + + def contents_iter_names(**kwds): + history = model.context.query( model.History ).filter( + model.History.name == "HistoryContentsHistory1" + ).first() + return list( map( lambda hda: hda.name, history.contents_iter( **kwds ) ) ) + + self.assertEquals(contents_iter_names(), [ "1", "2", "3", "4" ]) + assert contents_iter_names( deleted=False ) == [ "1", "2" ] + assert contents_iter_names( visible=True ) == [ "1", "3" ] + assert contents_iter_names( visible=False ) == [ "2", "4" ] + assert contents_iter_names( deleted=True, visible=False ) == [ "4" ] + + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ] ) == [ "1", "2", "3", "4" ] + assert contents_iter_names( ids=[ d1.id, d2.id, d3.id, d4.id ], max_in_filter_length=1 ) == [ "1", "2", "3", "4" ] + + assert contents_iter_names( ids=[ d1.id, d3.id ] ) == [ "1", "3" ] + + def new_hda( self, history, **kwds ): + return history.add_dataset( self.model.HistoryDatasetAssociation( create_dataset=True, sa_session=self.model.session, **kwds ) ) + @classmethod def setUpClass(cls): # Start the database and connect the mapping @@ -191,10 +226,20 @@ return cls.model.session.query( type ) @classmethod - def persist(cls, *args): + def persist(cls, *args, **kwargs): + session = cls.session() + flush = kwargs.get('flush', True) for arg in args: - cls.model.session.add( arg ) - cls.expunge() + session.add( arg ) + if flush: + session.flush() + if kwargs.get('expunge', not flush): + cls.expunge() + return arg # Return last or only arg. + + @classmethod + def session(cls): + return cls.model.session @classmethod def expunge(cls): Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.