1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/f4c4ba7be3d1/ changeset: f4c4ba7be3d1 user: natefoo date: 2012-05-26 21:00:36 summary: Provide a modified check_galaxy.py that is suitable for use with nagios. affected #: 3 files diff -r 62bdb265d3007d4f761a1defa82a44d888c30bfd -r f4c4ba7be3d17fbf7924e9f8d6ae005960fcc4c0 contrib/nagios/README --- /dev/null +++ b/contrib/nagios/README @@ -0,0 +1,1 @@ +Nagios checks for Galaxy. check_galaxy is used to call check_galaxy.py. diff -r 62bdb265d3007d4f761a1defa82a44d888c30bfd -r f4c4ba7be3d17fbf7924e9f8d6ae005960fcc4c0 contrib/nagios/check_galaxy --- /dev/null +++ b/contrib/nagios/check_galaxy @@ -0,0 +1,40 @@ +#!/bin/sh + +if [ -z "$3" ]; then + echo "usage: check_galaxy <server><username><password>" + exit 3 +fi + +here=`dirname $0` +var="$HOME/.check_galaxy/$1" + +touch $var/iterations +iterations=`cat $var/iterations` +if [ -z "$iterations" ]; then + iterations=0 +fi + +new_history='' +if [ $iterations -gt 96 ]; then + new_history='-n' + echo 0 > $var/iterations +else + echo `expr $iterations + 1` > $var/iterations +fi + +date >> $var/log +status=`python $here/check_galaxy.py $new_history $1 $2 $3 2>&1 | tee -a $var/log | tail -n 1` + +echo "$status" + +case "$status" in + "Exception: Tool never finished") + exit 1 + ;; + "OK") + exit 0 + ;; + *) + exit 2 + ;; +esac diff -r 62bdb265d3007d4f761a1defa82a44d888c30bfd -r f4c4ba7be3d17fbf7924e9f8d6ae005960fcc4c0 contrib/nagios/check_galaxy.py --- /dev/null +++ b/contrib/nagios/check_galaxy.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python +""" +check_galaxy can be run by hand, although it is meant to run from cron +via the check_galaxy.sh script in Galaxy's cron/ directory. +""" + +import socket, sys, os, time, tempfile, filecmp, htmllib, formatter, getopt +from user import home + +import warnings +with warnings.catch_warnings(): + warnings.simplefilter('ignore') + import twill + import twill.commands as tc + +# options +if os.environ.has_key( "DEBUG" ): + debug = os.environ["DEBUG"] +else: + debug = False + +test_data_dir = os.path.join( os.path.dirname( __file__ ), 'check_galaxy_data' ) +# what tools to run - not so pretty +tools = { + "Extract+genomic+DNA+1" : + [ + { + "inputs" : + ( + { + "file_path" : os.path.join( test_data_dir, "1.bed" ), + "dbkey" : "hg17", + }, + + ) + }, + { "check_file" : os.path.join( test_data_dir, "extract_genomic_dna_out1.fasta" ) }, + { + "tool_run_options" : + { + "input" : "1.bed", + "interpret_features" : "yes", + "index_source" : "cached", + "out_format" : "fasta" + } + } + ] +} + +# handle arg(s) +def usage(): + print "usage: check_galaxy.py <server><username><password>" + sys.exit(1) + +try: + opts, args = getopt.getopt( sys.argv[1:], 'n' ) +except getopt.GetoptError, e: + print str(e) + usage() +if len( args ) < 1: + usage() +server = args[0] +username = args[1] +password = args[2] + +if server.endswith(".g2.bx.psu.edu"): + if debug: + print "Checking a PSU Galaxy server, using maint file" + maint = "/errordocument/502/%s/maint" % args[0].split('.', 1)[0] +else: + maint = None + +new_history = False +for o, a in opts: + if o == "-n": + if debug: + print "Specified -n, will create a new history" + new_history = True + else: + usage() + +# state information +var_dir = os.path.join( home, ".check_galaxy", server ) +if not os.access( var_dir, os.F_OK ): + os.makedirs( var_dir, 0700 ) + +# default timeout for twill browser is never +socket.setdefaulttimeout(300) + +# user-agent +tc.agent("Mozilla/5.0 (compatible; check_galaxy/0.1)") +tc.config('use_tidy', 0) + +class Browser: + + def __init__(self): + self.server = server + self.maint = maint + self.tool = None + self.tool_opts = None + self.id = None + self.status = None + self.check_file = None + self.hid = None + self.cookie_jar = os.path.join( var_dir, "cookie_jar" ) + dprint("cookie jar path: %s" % self.cookie_jar) + if not os.access(self.cookie_jar, os.R_OK): + dprint("no cookie jar at above path, creating") + tc.save_cookies(self.cookie_jar) + tc.load_cookies(self.cookie_jar) + + def get(self, path): + tc.go("http://%s%s" % (self.server, path)) + tc.code(200) + + def reset(self): + self.tool = None + self.tool_opts = None + self.id = None + self.status = None + self.check_file = None + self.delete_datasets() + self.get("/root/history") + p = didParser() + p.feed(tc.browser.get_html()) + if len(p.dids) > 0: + print "Remaining datasets ids:", " ".join( p.dids ) + raise Exception, "History still contains datasets after attempting to delete them" + if new_history: + self.get("/history/delete_current") + tc.save_cookies(self.cookie_jar) + + def check_redir(self, url): + try: + tc.get_browser()._browser.set_handle_redirect(False) + tc.go(url) + tc.code(302) + tc.get_browser()._browser.set_handle_redirect(True) + dprint( "%s is returning redirect (302)" % url ) + return(True) + except twill.errors.TwillAssertionError, e: + tc.get_browser()._browser.set_handle_redirect(True) + dprint( "%s is not returning redirect (302): %s" % (url, e) ) + code = tc.browser.get_code() + if code == 502: + is_maint = self.check_maint() + if is_maint: + dprint( "Galaxy is down, but a maint file was found, so not sending alert" ) + sys.exit(0) + else: + print "Galaxy is down (code 502)" + sys.exit(1) + return(False) + + # checks for a maint file + def check_maint(self): + if self.maint is None: + #dprint( "Warning: unable to check maint file for %s" % self.server ) + return(False) + try: + self.get(self.maint) + return(True) + except twill.errors.TwillAssertionError, e: + return(False) + + def login(self, user, pw): + self.get("/user/login") + tc.fv("1", "email", user) + tc.fv("1", "password", pw) + tc.submit("Login") + tc.code(200) + if len(tc.get_browser().get_all_forms()) > 0: + # uh ohs, fail + p = userParser() + p.feed(tc.browser.get_html()) + if p.no_user: + dprint("user does not exist, will try creating") + self.create_user(user, pw) + elif p.bad_pw: + raise Exception, "Password is incorrect" + else: + raise Exception, "Unknown error logging in" + tc.save_cookies(self.cookie_jar) + + def create_user(self, user, pw): + self.get("/user/create") + tc.fv("1", "email", user) + tc.fv("1", "password", pw) + tc.fv("1", "confirm", pw) + tc.submit("Submit") + tc.code(200) + if len(tc.get_browser().get_all_forms()) > 0: + p = userParser() + p.feed(tc.browser.get_html()) + if p.already_exists: + raise Exception, 'The user you were trying to create already exists' + + def upload(self, input): + self.get("/tool_runner/index?tool_id=upload1") + tc.fv("1","file_type", "bed") + tc.fv("1","dbkey", input.get('dbkey', '?')) + tc.formfile("1","file_data", input['file_path']) + tc.submit("runtool_btn") + tc.code(200) + + def runtool(self): + self.get("/tool_runner/index?tool_id=%s" % self.tool) + for k, v in self.tool_opts.items(): + tc.fv("1", k, v) + tc.submit("runtool_btn") + tc.code(200) + + def wait(self): + sleep_amount = 1 + count = 0 + maxiter = 16 + while count < maxiter: + count += 1 + self.get("/root/history") + page = tc.browser.get_html() + if page.find( '<!-- running: do not change this comment, used by TwillTestCase.wait -->' ) > -1: + time.sleep( sleep_amount ) + sleep_amount += 1 + else: + break + if count == maxiter: + raise Exception, "Tool never finished" + + def check_status(self): + self.get("/root/history") + p = historyParser() + p.feed(tc.browser.get_html()) + if p.status != "ok": + self.get("/datasets/%s/stderr" % p.id) + print tc.browser.get_html() + raise Exception, "HDA %s NOT OK: %s" % (p.id, p.status) + self.id = p.id + self.status = p.status + #return((p.id, p.status)) + + def diff(self): + self.get("/datasets/%s/display?to_ext=%s" % (self.id, self.tool_opts.get('out_format', 'fasta'))) + data = tc.browser.get_html() + tmp = tempfile.mkstemp() + dprint("tmp file: %s" % tmp[1]) + tmpfh = os.fdopen(tmp[0], 'w') + tmpfh.write(data) + tmpfh.close() + if filecmp.cmp(tmp[1], self.check_file): + dprint("Tool output is as expected") + else: + if not debug: + os.remove(tmp[1]) + raise Exception, "Tool output differs from expected" + if not debug: + os.remove(tmp[1]) + + def delete_datasets(self): + self.get("/root/history") + p = didParser() + p.feed(tc.browser.get_html()) + dids = p.dids + for did in dids: + self.get("/datasets/%s/delete" % did) + + def check_if_logged_in(self): + self.get("/user?cntrller=user") + p = loggedinParser() + p.feed(tc.browser.get_html()) + return p.logged_in + +class userParser(htmllib.HTMLParser): + def __init__(self): + htmllib.HTMLParser.__init__(self, formatter.NullFormatter()) + self.in_span = False + self.in_div = False + self.no_user = False + self.bad_pw = False + self.already_exists = False + def start_span(self, attrs): + self.in_span = True + def start_div(self, attrs): + self.in_div = True + def end_span(self): + self.in_span = False + def end_div(self): + self.in_div = False + def handle_data(self, data): + if self.in_span or self.in_div: + if data == "No such user (please note that login is case sensitive)": + self.no_user = True + elif data == "Invalid password": + self.bad_pw = True + elif data == "User with that email already exists": + self.already_exists = True + +class historyParser(htmllib.HTMLParser): + def __init__(self): + htmllib.HTMLParser.__init__(self, formatter.NullFormatter()) + self.status = None + self.id = None + def start_div(self, attrs): + # find the top history item + for i in attrs: + if i[0] == "class" and i[1].startswith("historyItemWrapper historyItem historyItem-"): + self.status = i[1].rsplit("historyItemWrapper historyItem historyItem-", 1)[1] + dprint("status: %s" % self.status) + if i[0] == "id" and i[1].startswith("historyItem-"): + self.id = i[1].rsplit("historyItem-", 1)[1] + dprint("id: %s" % self.id) + if self.status is not None: + self.reset() + +class didParser(htmllib.HTMLParser): + def __init__(self): + htmllib.HTMLParser.__init__(self, formatter.NullFormatter()) + self.dids = [] + def start_div(self, attrs): + for i in attrs: + if i[0] == "id" and i[1].startswith("historyItemContainer-"): + self.dids.append( i[1].rsplit("historyItemContainer-", 1)[1] ) + dprint("got a dataset id: %s" % self.dids[-1]) + +class loggedinParser(htmllib.HTMLParser): + def __init__(self): + htmllib.HTMLParser.__init__(self, formatter.NullFormatter()) + self.in_p = False + self.logged_in = False + def start_p(self, attrs): + self.in_p = True + def end_p(self): + self.in_p = False + def handle_data(self, data): + if self.in_p: + if data == "You are currently not logged in.": + self.logged_in = False + elif data.startswith( "You are currently logged in as " ): + self.logged_in = True + +def dprint(str): + if debug: + print str + +# do stuff here +if __name__ == "__main__": + + dprint("checking %s" % server) + + b = Browser() + + # login (or not) + if b.check_if_logged_in(): + dprint("we are already logged in (via cookies), hooray!") + else: + dprint("not logged in... logging in") + b.login(username, password) + + for tool, params in tools.iteritems(): + + check_file = "" + + # make sure history and state is clean + b.reset() + b.tool = tool + + # get all the tool run conditions + for dict in params: + for k, v in dict.items(): + if k == 'inputs': + for input in v: + b.upload(input) + b.wait() + elif k == 'check_file': + b.check_file = v + elif k == 'tool_run_options': + b.tool_opts = v + else: + raise Exception, "Unknown key in tools dict: %s" % k + + b.runtool() + b.wait() + b.check_status() + b.diff() + b.delete_datasets() + + # by this point, everything else has succeeded. there should be no maint. + is_maint = b.check_maint() + if is_maint: + print "Galaxy is up and fully functional, but a maint file is in place." + sys.exit(1) + + print "OK" + sys.exit(0) Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.