details: http://www.bx.psu.edu/hg/galaxy/rev/f2e583f7cab9 changeset: 1605:f2e583f7cab9 user: Greg Von Kuster <greg@bx.psu.edu> date: Tue Nov 04 16:04:49 2008 -0500 description: Add ability to track memory usage in code secments - requires config addition: # Log memory usage log_memory_usage = False Added logging statements in tool execution methods and most methods in the user and root controllers. Also fixes for checking config settings that should be boolean ( e.g., memdump, heartbeat ). 9 file(s) affected in this change: lib/galaxy/app.py lib/galaxy/config.py lib/galaxy/eggs/__init__.py lib/galaxy/tools/actions/__init__.py lib/galaxy/tools/actions/upload.py lib/galaxy/util/memory_usage.py lib/galaxy/web/controllers/root.py lib/galaxy/web/controllers/user.py universe_wsgi.ini.sample diffs (469 lines): diff -r 7a5d9759c702 -r f2e583f7cab9 lib/galaxy/app.py --- a/lib/galaxy/app.py Tue Nov 04 14:00:30 2008 -0500 +++ b/lib/galaxy/app.py Tue Nov 04 16:04:49 2008 -0500 @@ -38,6 +38,7 @@ # Heartbeat and memdump for thread / heap profiling self.heartbeat = None self.memdump = None + self.memory_usage = None # Start the heartbeat process if configured and available if self.config.use_heartbeat: from galaxy.util import heartbeat @@ -49,6 +50,10 @@ from galaxy.util import memdump if memdump.Memdump: self.memdump = memdump.Memdump() + # Enable memory_usage logging if configured + if self.config.log_memory_usage: + from galaxy.util import memory_usage + self.memory_usage = memory_usage def shutdown( self ): self.job_manager.shutdown() if self.heartbeat: diff -r 7a5d9759c702 -r f2e583f7cab9 lib/galaxy/config.py --- a/lib/galaxy/config.py Tue Nov 04 14:00:30 2008 -0500 +++ b/lib/galaxy/config.py Tue Nov 04 16:04:49 2008 -0500 @@ -55,8 +55,9 @@ self.pbs_dataset_server = kwargs.get('pbs_dataset_server', "" ) self.pbs_dataset_path = kwargs.get('pbs_dataset_path', "" ) self.pbs_stage_path = kwargs.get('pbs_stage_path', "" ) - self.use_heartbeat = string_as_bool( kwargs.get( 'use_heartbeat', "False" ) ) - self.use_memdump = kwargs.get( 'use_memdump', False ) + self.use_heartbeat = string_as_bool( kwargs.get( 'use_heartbeat', False ) ) + self.use_memdump = string_as_bool( kwargs.get( 'use_memdump', False ) ) + self.log_memory_usage = string_as_bool( kwargs.get( 'log_memory_usage', False ) ) self.ucsc_display_sites = kwargs.get( 'ucsc_display_sites', "main,test,archaea" ).lower().split(",") self.gbrowse_display_sites = kwargs.get( 'gbrowse_display_sites', "wormbase,flybase,elegans" ).lower().split(",") self.brand = kwargs.get( 'brand', None ) diff -r 7a5d9759c702 -r f2e583f7cab9 lib/galaxy/eggs/__init__.py --- a/lib/galaxy/eggs/__init__.py Tue Nov 04 14:00:30 2008 -0500 +++ b/lib/galaxy/eggs/__init__.py Tue Nov 04 16:04:49 2008 -0500 @@ -484,7 +484,7 @@ return False elif egg_name == "threadframe": try: - if self.config.get( "app:main", "use_heartbeat" ) == "True": + if self.config.get( "app:main", "use_heartbeat" ): return True else: return False @@ -492,7 +492,7 @@ return False elif egg_name == "guppy": try: - if self.config.get( "app:main", "use_memdump" ) == "True": + if self.config.get( "app:main", "use_memdump" ): return True else: return False diff -r 7a5d9759c702 -r f2e583f7cab9 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py Tue Nov 04 14:00:30 2008 -0500 +++ b/lib/galaxy/tools/actions/__init__.py Tue Nov 04 16:04:49 2008 -0500 @@ -65,6 +65,9 @@ return input_datasets def execute(self, tool, trans, incoming={}, set_output_hid=True ): + if trans.app.memory_usage: + # Keep track of memory usage + m0 = trans.app.memory_usage.memory() out_data = {} # Collect any input datasets from the incoming parameters inp_data = self.collect_input_datasets( tool, incoming, trans ) @@ -198,6 +201,9 @@ for name, dataset in out_data.iteritems(): job.add_output_dataset( name, dataset ) trans.app.model.flush() + if trans.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info("End of tool %s execution for job id %d, memory used increased by %s" % ( tool.id, job.id, m1 ) ) # Some tools are not really executable, but jobs are still created for them ( for record keeping ). # Examples include tools that redirect to other applications ( epigraph ). These special tools must # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job diff -r 7a5d9759c702 -r f2e583f7cab9 lib/galaxy/tools/actions/upload.py --- a/lib/galaxy/tools/actions/upload.py Tue Nov 04 14:00:30 2008 -0500 +++ b/lib/galaxy/tools/actions/upload.py Tue Nov 04 16:04:49 2008 -0500 @@ -15,6 +15,9 @@ self.line_count = None def execute( self, tool, trans, incoming={}, set_output_hid = True ): + if trans.app.memory_usage: + # Keep track of memory usage + m0 = trans.app.memory_usage.memory() data_file = incoming['file_data'] file_type = incoming['file_type'] dbkey = incoming['dbkey'] @@ -93,6 +96,9 @@ job.flush() log.info( 'job id %d ended ok, file size: %s' % ( job.id, file_size_str ) ) trans.log_event( 'job id %d ended ok, file size: %s' % ( job.id, file_size_str ), tool_id=tool.id ) + if trans.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info("End of tool %s execution for job id %d, memory used increased by %s" % ( tool.id, job.id, m1 ) ) return dict( output=hda ) def upload_empty(self, trans, job, err_code, err_msg): diff -r 7a5d9759c702 -r f2e583f7cab9 lib/galaxy/util/memory_usage.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/util/memory_usage.py Tue Nov 04 16:04:49 2008 -0500 @@ -0,0 +1,44 @@ +import os, platform, logging +from galaxy.datatypes.data import nice_size + +log = logging.getLogger( __name__ ) + +_proc_status = '/proc/%d/status' % os.getpid() + +_scale = { 'kB': 1024.0, 'mB': 1024.0*1024.0, 'KB': 1024.0, 'MB': 1024.0*1024.0 } + +def _VmB( VmKey ): + '''Private. + ''' + global _proc_status, _scale + # get pseudo file /proc/<pid>/status + try: + t = open( _proc_status ) + v = t.read() + t.close() + except: + log.debug("memory_usage is currently supported only on Linux, your platform is %s %s" % ( platform.system(), platform.release() ) ) + return 0.0 # non-Linux? + # get VmKey line e.g. 'VmRSS: 9999 kB\n ...' + i = v.index( VmKey ) + v = v[ i: ].split( None, 3 ) # whitespace + if len( v ) < 3: + return 0.0 # invalid format? + # convert Vm value to bytes + return float( v[ 1 ] ) * _scale[ v[ 2 ] ] +def memory( since=0.0, pretty=False ): + '''Return memory usage in bytes. + ''' + size = _VmB( 'VmSize:' ) - since + if pretty: + return nice_size( size ) + else: + return size +def resident( since=0.0 ): + '''Return resident memory usage in bytes. + ''' + return _VmB( 'VmRSS:' ) - since +def stacksize( since=0.0 ): + '''Return stack size in bytes. + ''' + return _VmB( 'VmStk:' ) - since diff -r 7a5d9759c702 -r f2e583f7cab9 lib/galaxy/web/controllers/root.py --- a/lib/galaxy/web/controllers/root.py Tue Nov 04 14:00:30 2008 -0500 +++ b/lib/galaxy/web/controllers/root.py Tue Nov 04 16:04:49 2008 -0500 @@ -116,6 +116,9 @@ Returns data directly into the browser. Sets the mime-type according to the extension """ + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() if hid is not None: try: hid = int( hid ) @@ -146,6 +149,9 @@ fname = ''.join(c in valid_chars and c or '_' for c in fname)[0:150] trans.response.headers["Content-Disposition"] = "attachment; filename=GalaxyHistoryItem-%s-[%s]%s" % (data.hid, fname, toext) trans.log_event( "Display dataset id: %s" % str(id) ) + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/display, memory used increased by %s" % m1 ) try: return open( data.file_name ) except: @@ -321,6 +327,9 @@ @web.expose def history_delete( self, trans, id=None, **kwd): """Deletes a list of histories, ensures that histories are owned by current user""" + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() history_names = [] if id: if isinstance( id, list ): @@ -348,12 +357,18 @@ trans.log_event( "History id %s marked as deleted" % str(hid) ) else: return trans.show_message( "You must select at least one history to delete." ) + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/history_delete, memory used increased by %s" % m1 ) return trans.show_message( "History deleted: %s" % ",".join(history_names), refresh_frames=['history']) @web.expose def history_undelete( self, trans, id=[], **kwd): """Undeletes a list of histories, ensures that histories are owned by current user""" + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() history_names = [] errors = [] ok_msg = "" @@ -385,22 +400,34 @@ ok_msg = "Histories (%s) have been undeleted." % ", ".join( history_names ) else: errors.append( "You must select at least one history to undelete." ) + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/history_undelete, memory used increased by %s" % m1 ) return self.history_available( trans, id=','.join( id ), show_deleted=True, ok_msg = ok_msg, error_msg = " ".join( errors ) ) @web.expose def clear_history( self, trans ): """Clears the history for a user""" + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() history = trans.get_history() for dataset in history.datasets: dataset.deleted = True dataset.clear_associated_files() self.app.model.flush() trans.log_event( "History id %s cleared" % (str(history.id)) ) + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/clear_history, memory used increased by %s" % m1 ) trans.response.send_redirect( url_for("/index" ) ) @web.expose @web.require_login( "share histories with other users" ) def history_share( self, trans, id=None, email="", **kwd ): + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() send_to_err = "" if not id: id = trans.get_history().id @@ -427,6 +454,9 @@ trans.log_event( "History share, id: %s, name: '%s': to new id: %s" % (str(history.id), history.name, str(new_history.id)) ) self.app.model.flush() return trans.show_message( "History (%s) has been shared with: %s" % (",".join(history_names),email) ) + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/history_share, memory used increased by %s" % m1 ) return trans.fill_template( "/history/share.mako", histories=histories, email=email, send_to_err=send_to_err) @web.expose @@ -435,18 +465,21 @@ """ List all available histories """ + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() if as_xml: trans.response.set_content_type('text/xml') return trans.fill_template( "/history/list_as_xml.mako" ) if not isinstance( id, list ): id = id.split( "," ) trans.log_event( "History id %s available" % str( id ) ) - history_operations = dict( share=self.history_share, rename=self.history_rename, delete=self.history_delete, undelete=self.history_undelete ) - + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/history_available, memory used increased by %s" % m1 ) if do_operation in history_operations: return history_operations[do_operation]( trans, id=id, show_deleted=show_deleted, ok_msg=ok_msg, error_msg=error_msg, **kwd ) - return trans.fill_template( "/history/list.mako", ids=id, user=trans.get_user(), current_history=trans.get_history(), @@ -457,6 +490,9 @@ @web.expose def history_import( self, trans, id=None, confirm=False, **kwd ): + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() msg = "" user = trans.get_user() user_history = trans.get_history() @@ -497,6 +533,9 @@ new_history.flush() trans.set_history( new_history ) trans.log_event( "History imported, id: %s, name: '%s': " % (str(new_history.id) , new_history.name ) ) + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/history_import, memory used increased by %s" % m1 ) return trans.show_ok_message( """ History "%s" has been imported. Click <a href="%s">here</a> to begin.""" % ( new_history.name, web.url_for( '/' ) ) ) @@ -511,6 +550,9 @@ if not id: return trans.response.send_redirect( web.url_for( action='history_available' ) ) else: + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() new_history = trans.app.model.History.get( id ) if new_history: galaxy_session = trans.get_galaxy_session() @@ -522,6 +564,9 @@ new_history.flush() trans.set_history( new_history ) trans.log_event( "History switched to id: %s, name: '%s'" % (str(new_history.id), new_history.name ) ) + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/history_switch, memory used increased by %s" % m1 ) return trans.show_message( "History switched to: %s" % new_history.name, refresh_frames=['history']) else: @@ -529,13 +574,22 @@ @web.expose def history_new( self, trans ): + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() trans.new_history() trans.log_event( "Created new History, id: %s." % str(trans.get_history().id) ) + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/history_new, memory used increased by %s" % m1 ) return trans.show_message( "New history created", refresh_frames = ['history'] ) @web.expose @web.require_login( "renames histories" ) def history_rename( self, trans, id=None, name=None, **kwd ): + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() user = trans.get_user() if not isinstance( id, list ): @@ -572,11 +626,17 @@ change_msg = change_msg + "<p>You must specify a valid name for History: "+cur_names[i]+"</p>" else: change_msg = change_msg + "<p>History: "+cur_names[i]+" does not appear to belong to you.</p>" + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/history_rename, memory used increased by %s" % m1 ) return trans.show_message( "<p>%s" % change_msg, refresh_frames=['history'] ) @web.expose def history_add_to( self, trans, history_id=None, file_data=None, name="Data Added to History",info=None,ext="txt",dbkey="?",**kwd ): """Adds a POSTed file to a History""" + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() try: history = trans.app.model.History.get( history_id ) data = trans.app.model.HistoryDatasetAssociation( name = name, info = info, extension = ext, dbkey = dbkey, create_dataset = True ) @@ -595,6 +655,9 @@ data.set_size() data.flush() trans.log_event("Added dataset %d to history %d" %(data.id, trans.history.id)) + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/history_add_to, memory used increased by %s" % m1 ) return trans.show_ok_message("Dataset "+str(data.hid)+" added to history "+str(history_id)+".") except: return trans.show_error_message("Adding File to History has Failed") @@ -602,6 +665,9 @@ @web.expose def dataset_make_primary( self, trans, id=None): """Copies a dataset and makes primary""" + if trans.app.memory_usage: + # Keep track of memory usage + m0 = self.app.memory_usage.memory() try: old_data = self.app.model.HistoryDatasetAssociation.get( id ) new_data = old_data.copy() @@ -610,6 +676,9 @@ history = trans.get_history() history.add_dataset(new_data) new_data.flush() + if self.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of root/dataset_make_primary, memory used increased by %s" % m1 ) return trans.show_message( "<p>Secondary dataset has been made primary.</p>", refresh_frames=['history'] ) except: return trans.show_error_message( "<p>Failed to make secondary dataset primary.</p>" ) diff -r 7a5d9759c702 -r f2e583f7cab9 lib/galaxy/web/controllers/user.py --- a/lib/galaxy/web/controllers/user.py Tue Nov 04 14:00:30 2008 -0500 +++ b/lib/galaxy/web/controllers/user.py Tue Nov 04 16:04:49 2008 -0500 @@ -70,6 +70,9 @@ @web.expose def login( self, trans, email='', password='' ): + if trans.app.memory_usage: + # Keep track of memory usage + m0 = trans.app.memory_usage.memory() email_error = password_error = None # Attempt login if email or password: @@ -85,6 +88,9 @@ trans.handle_user_login( user ) trans.log_event( "User logged in" ) return trans.show_ok_message( "Now logged in as " + user.email, refresh_frames=['masthead', 'history'] ) + if trans.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of user/login, memory used increased by %s" % m1 ) return trans.show_form( web.FormBuilder( web.url_for(), "Login", submit_text="Login" ) .add_text( "email", "Email address", value=email, error=email_error ) @@ -92,13 +98,22 @@ help="<a href='%s'>Forgot password? Reset here</a>" % web.url_for( action='reset_password' ) ) ) @web.expose def logout( self, trans ): + if trans.app.memory_usage: + # Keep track of memory usage + m0 = trans.app.memory_usage.memory() # Since logging an event requires a session, we'll log prior to ending the session trans.log_event( "User logged out" ) trans.handle_user_logout() + if trans.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of user/logout, memory used increased by %s" % m1 ) return trans.show_ok_message( "You are no longer logged in", refresh_frames=['masthead', 'history'] ) @web.expose def create( self, trans, email='', password='', confirm='',subscribe=False ): + if trans.app.memory_usage: + # Keep track of memory usage + m0 = trans.app.memory_usage.memory() email_error = password_error = confirm_error = None if email: if len( email ) == 0 or "@" not in email or "." not in email: @@ -120,10 +135,13 @@ trans.log_event( "User logged in" ) #subscribe user to email list if subscribe: - mail = os.popen("%s -t" % self.app.config.sendmail_path, 'w') - mail.write("To: %s\nFrom: %s\nSubject: Join Mailing List\n\nJoin Mailing list." % (self.app.config.mailing_join_addr,email) ) + mail = os.popen("%s -t" % trans.app.config.sendmail_path, 'w') + mail.write("To: %s\nFrom: %s\nSubject: Join Mailing List\n\nJoin Mailing list." % (trans.app.config.mailing_join_addr,email) ) if mail.close(): return trans.show_warn_message( "Now logged in as " + user.email+". However, subscribing to the mailing list has failed.", refresh_frames=['masthead', 'history'] ) + if trans.app.memory_usage: + m1 = trans.app.memory_usage.memory( m0, pretty=True ) + log.info( "End of user/create, memory used increased by %s" % m1 ) return trans.show_ok_message( "Now logged in as " + user.email, refresh_frames=['masthead', 'history'] ) return trans.show_form( web.FormBuilder( web.url_for(), "Create account", submit_text="Create" ) @@ -145,7 +163,7 @@ new_pass = "" for i in range(15): new_pass = new_pass + choice(chars) - mail = os.popen("%s -t" % self.app.config.sendmail_path, 'w') + mail = os.popen("%s -t" % trans.app.config.sendmail_path, 'w') mail.write("To: %s\nFrom: no-reply@%s\nSubject: Galaxy Password Reset\n\nYour password has been reset to \"%s\" (no quotes)." % (email, trans.request.remote_addr, new_pass) ) if mail.close(): return trans.show_ok_message( "Failed to reset password! If this problem persist, submit a bug report.") diff -r 7a5d9759c702 -r f2e583f7cab9 universe_wsgi.ini.sample --- a/universe_wsgi.ini.sample Tue Nov 04 14:00:30 2008 -0500 +++ b/universe_wsgi.ini.sample Tue Nov 04 16:04:49 2008 -0500 @@ -15,6 +15,9 @@ # Specifies the factory for the universe WSGI application paste.app_factory = galaxy.web.buildapp:app_factory log_level = DEBUG + +# Log memory usage +log_memory_usage = False # Should jobs be tracked through the database, rather than in memory ## track_jobs_in_database = true