r99240 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r99239‎ | r99240 | r99241 >
Date:19:21, 7 October 2011
Author:ben
Status:deferred
Tags:
Comment:
importing version 1.2 that has many bugfixes and uses a newer xml library
Modified paths:
  • /trunk/debs/ganglios/debian/changelog (modified) (history)
  • /trunk/debs/ganglios/debian/rules (modified) (history)
  • /trunk/debs/ganglios/src/check_ganglios_disk (modified) (history)
  • /trunk/debs/ganglios/src/check_ganglios_generic_value (modified) (history)
  • /trunk/debs/ganglios/src/check_ganglios_memory_v2 (modified) (history)
  • /trunk/debs/ganglios/src/ganglia_parser (modified) (history)
  • /trunk/debs/ganglios/src/ganglios/ganglios.py (modified) (history)

Diff [purge]

Index: trunk/debs/ganglios/debian/changelog
@@ -1,8 +1,14 @@
2 -ganglios (1.1) stable; urgency=low
 2+ganglios (1.2) stable; urgency=low
33
4 - * changed listXMLSources to read the list from /etc/ganglia/gmetad.conf
 4+ * changed ganglia_parser to use gmetad.conf instead of nannybot#
 5+ * updated ganglios.py to use the native xml etree instead of (deprecated)
 6+ ElementTree
 7+ * fixed broken memory and disk checks
 8+ * added additional help text to the generic check
 9+ * configured the ganglios package to install the cronjob (activating the
 10+ parser)
511
6 - -- Ben Hartshorne <bhartshorne@wikimedia.org> Mon, 3 Oct 2011 16:35:00 -0700
 12+ -- Ben Hartshorne <bhartshorne@wikimedia.org> Fri, 7 Oct 2011 12:02:34 -0700
713
814 ganglios (1.0-1) stable; urgency=low
915
Index: trunk/debs/ganglios/debian/rules
@@ -72,7 +72,7 @@
7373 # dh_installmime
7474 dh_pysupport
7575 # dh_installinit
76 -# dh_installcron
 76+ dh_installcron
7777 # dh_installinfo
7878 dh_installman
7979 dh_link
Index: trunk/debs/ganglios/src/check_ganglios_memory_v2
@@ -64,7 +64,7 @@
6565 sys.stdout.write(output)
6666 ganglios.done(2)
6767
68 - if( options.free ):
 68+ if( options.avail ):
6969 # testing against free memory
7070 mem = mem_shared + mem_free + mem_cached + mem_buffers
7171 metric = 'mem_avail'
@@ -76,16 +76,15 @@
7777 op = 'gt'
7878
7979
80 - if getattr(operator, op)(cur_val, crit_value):
81 - if mem_used > crit_value:
 80+ if getattr(operator, op)(mem, crit_value):
8281 status = 2
83 - output = "CRITICAL: %s is %s (op %s)" % (metric, cur_val, op, crit_value)
84 - elif mem_used > warn_value:
 82+ output = "CRITICAL: %s is %s (%s %s)" % (metric, mem, op, crit_value)
 83+ elif getattr(operator, op)(mem, warn_value):
8584 status = 1
86 - output = "WARN: %s is %s (op %s)" % (metric, cur_val, op, warn_value)
 85+ output = "WARN: %s is %s (%s %s)" % (metric, mem, op, warn_value)
8786 else:
8887 status = 0
89 - output = "OK: %s is %s" % (metric, cur_val)
 88+ output = "OK: %s is %s" % (metric, mem)
9089
9190 sys.stdout.write(output)
9291 ganglios.done(status)
Index: trunk/debs/ganglios/src/ganglios/ganglios.py
@@ -29,7 +29,7 @@
3030 import glob
3131 import socket
3232
33 -import elementtree.ElementTree as ET
 33+import xml.etree.ElementTree as ET
3434 import xml.parsers.expat as expat
3535
3636 __revision__ = '0'
@@ -52,6 +52,8 @@
5353 status = 0 # ok
5454 bad = []
5555
 56+ # go_bad collects xml cache files that are old, broken or otherwise
 57+ # unparseable and stops us from parsing them again in the future
5658 def go_bad (xml_file, bad):
5759 """ change status to bad, and output the stale nannybot """
5860 bad_host = xml_file.replace ('.xml', '')
@@ -75,15 +77,19 @@
7678 f_hndl = open (filename)
7779 try:
7880 tree = ET.parse (f_hndl)
79 - ganglia_xml = tree.getroot()
80 - for cluster in ganglia_xml.getchildren ():
81 - for host in cluster.getchildren ():
82 - for metric in host.getchildren ():
83 - # found a metric we care about.
84 - if metric.get ('NAME') in metrics:
85 - thunk (host.get ('NAME'),
86 - metric.get ('NAME'),
87 - metric.get ('VAL'))
 81+ root = tree.getroot()
 82+ clusters = list(root)
 83+ for cluster in clusters:
 84+ for host in cluster.findall('HOST'):
 85+ for metric in host.findall('METRIC'):
 86+ if metric.attrib['NAME'] in metrics:
 87+ try:
 88+ thunk( host.attrib['NAME'],
 89+ metric.attrib['NAME'],
 90+ metric.attrib['VAL'])
 91+ except Exception, e:
 92+ print "thunk threw an exception: %s" % e
 93+ raise
8894 except expat.ExpatError:
8995 go_bad (xml_file, bad)
9096 status = 2
@@ -122,6 +128,9 @@
123129 filelist = glob.glob(os.path.join(_hostdir, "*.%s" % hostname))
124130 if len(filelist) == 0:
125131 filelist = glob.glob(os.path.join(_hostdir, "%s" % hostname))
 132+ # if there's still no match, complain host not found.
 133+ if len(filelist) == 0:
 134+ raise Exception("Host not found: %s." % hostname)
126135 ###
127136 ### for the VPNs, it's a valid state that there exist >1 files for each vpn
128137 ### (a tunnel address and a private interface). What's the right action to take
@@ -142,11 +151,10 @@
143152 f_hndl = open(filename)
144153 try:
145154 tree = ET.parse (f_hndl)
146 - host = tree.getroot()
147 - for metric in host.getchildren ():
 155+ for metric in tree.findall('METRIC'):
148156 # found a metric we care about.
149 - if metric.get ('NAME') == metricname:
150 - return metric.get('VAL')
 157+ if metric.attrib['NAME'] == metricname:
 158+ return metric.attrib['VAL']
151159 except expat.ExpatError:
152160 sys.stdout.write("XML parse error")
153161 done(2)
Index: trunk/debs/ganglios/src/check_ganglios_generic_value
@@ -36,7 +36,14 @@
3737 # actually, any valid function supplied by the 'operator' module that return a
3838 # boolean will probably work, but the above functions are what I expect to use
3939
40 - parser = OptionParser()
 40+ description = """check_ganglios_generic_value -H hostname -m metric -w val -c val -o op
 41+
 42+ Checks the value of a metric against the warning and crit values
 43+ passed in using the operator supplied. An example call that checks that
 44+ the load_one metric for foo.example.com. It will trigger WARN if load_one
 45+ is greater than 2 and CRIT when greater than 4:
 46+ ./check_ganglios_generic_value -H foo.example.com -m load_one -w 2 -c 4 -o gt"""
 47+ parser = OptionParser(usage=description)
4148
4249 parser.add_option('-H', dest='hostname', help='hostname')
4350 parser.add_option('-m', dest='metric', help='metric to check')
Index: trunk/debs/ganglios/src/check_ganglios_disk
@@ -101,12 +101,12 @@
102102 def build_metrics_include( host, metric, value):
103103 ''' callback from ganglios.parse_ganglia '''
104104 if host in incl_excl_hosts:
105 - hostmetrics[ incl_excl_hosts[host] ] = int(value)
 105+ hostmetrics[ incl_excl_hosts[host] ] = float(value)
106106
107107 def build_metrics_exclude( host, metric, value):
108108 ''' callback from ganglios.parse_ganglia '''
109109 if not host in incl_excl_hosts:
110 - hostmetrics[ host ] = int(value)
 110+ hostmetrics[ host ] = float(value)
111111
112112 status = 0 # OK
113113 if( inexcl_flag == 'include' ):
Index: trunk/debs/ganglios/src/ganglia_parser
@@ -31,7 +31,7 @@
3232 formatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s')
3333 hdlr.setFormatter(formatter)
3434 logger.addHandler(hdlr)
35 -logger.setLevel(logging.WARNING)
 35+logger.setLevel(logging.DEBUG)
3636
3737 def unionMetrics( curMetrics, hostFilePath ):
3838 '''When a single host is present in more than one gmond collector node
@@ -110,13 +110,16 @@
111111 # so long as ganglios is running on the same host as the ganglia web ui, it
112112 # can use ganglia's gmetad.conf to get the list of sources.
113113 gmetadconf = open('/etc/ganglia/gmetad.conf')
114 - datasourcere = re.compile('^data_source "(?P<name>[^"]*)" (?P<hostlist>.*)')
 114+ # datasource is the string 'data_source' followed by a quoted string name followed by
 115+ # an optional polling interval followed by a list of hostnames
 116+ datasourcere = re.compile('^data_source "(?P<name>[^"]*)" (?P<pollint>\d+ )?(?P<hostlist>.*)')
115117 for line in gmetadconf.readlines():
116118 match = datasourcere.match(line)
117119 if match:
118 - for host in match.group(2).split():
 120+ for host in match.group('hostlist').split():
119121 nannybots.append(host)
120122
 123+ logger.info("nannybot list: %s" % nannybots)
121124 return nannybots
122125
123126 def storeXMLData(srcHosts, dataDir):

Status & tagging log