r69284 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69283‎ | r69284 | r69285 >
Date:09:36, 12 July 2010
Author:tstarling
Status:deferred
Tags:
Comment:
* Added Linux software RAID support
* Added ZFS pool support (on Solaris)
Modified paths:
  • /trunk/debs/wikimedia-raid-utils/check-raid.py (modified) (history)
  • /trunk/debs/wikimedia-raid-utils/debian/changelog (modified) (history)

Diff [purge]

Index: trunk/debs/wikimedia-raid-utils/check-raid.py
@@ -1,8 +1,42 @@
22 #!/usr/bin/python
33
4 -import sys, os, re, subprocess
 4+import sys, os, os.path, re, subprocess
55
66 def main():
 7+ osName = os.uname()[0]
 8+ if osName == 'SunOS':
 9+ utility = 'zpool'
 10+ elif osName == 'Linux':
 11+ utility = getLinuxUtility()
 12+ else:
 13+ print 'WARNING: Operating system "%s" is not supported by this check script' % (osName)
 14+ sys.exit(1)
 15+
 16+ try:
 17+ if utility == None:
 18+ print 'OK: no RAID installed'
 19+ status = 0
 20+ elif utility == 'arcconf':
 21+ status = checkAdaptec()
 22+ elif utility == 'tw_cli':
 23+ status = check3ware()
 24+ elif utility == 'MegaCli':
 25+ status = checkMegaSas()
 26+ elif utility == 'zpool':
 27+ status = checkZfs()
 28+ elif utility == 'mdadm':
 29+ status = checkSoftwareRaid()
 30+ else:
 31+ print 'WARNING: %s is not yet supported by this check script' % (utility)
 32+ status = 1
 33+ except:
 34+ error = sys.exc_info()[1]
 35+ print 'WARNING: check-raid.py encountered exception: ' + str(error)
 36+ status = 1
 37+
 38+ sys.exit(status)
 39+
 40+def getLinuxUtility():
741 f = open("/proc/devices", "r")
842 regex = re.compile('^\s*\d+\s+(\w+)')
943 utility = None
@@ -23,29 +57,39 @@
2458 break
2559 elif name == 'megaraid_sas_ioctl':
2660 utility = 'MegaCli'
27 -
 61+ break
 62+
2863 f.close()
 64+ if utility != None:
 65+ return utility
2966
 67+ # Try mdadm
 68+ devices = getSoftwareRaidDevices()
 69+ if len(devices):
 70+ return 'mdadm'
 71+
 72+ return None
 73+
 74+def getSoftwareRaidDevices():
 75+ if not os.path.exists('/sbin/mdadm'):
 76+ return []
 77+
3078 try:
31 - if utility == None:
32 - print 'OK: no RAID installed'
33 - status = 0
34 - elif utility == 'arcconf':
35 - status = checkAdaptec()
36 - elif utility == 'tw_cli':
37 - status = check3ware()
38 - elif utility == 'MegaCli':
39 - status = checkMegaSas()
40 - else:
41 - print 'WARNING: %s is not yet supported by this check script' % (utility)
42 - status = 1
 79+ proc = subprocess.Popen(['/sbin/mdadm', '--detail', '--scan'],
 80+ stdout=subprocess.PIPE)
4381 except:
44 - error = sys.exc_info()[1]
45 - print 'WARNING: check-raid.py encountered exception: ' + str(error)
46 - status = 1
47 -
48 - sys.exit(status)
 82+ return []
4983
 84+ regex = re.compile('^ARRAY\s+([^ ]*) ')
 85+ devices = []
 86+ for line in proc.stdout:
 87+ m = regex.match(line)
 88+ if m != None:
 89+ devices.append(m.group(1))
 90+ proc.wait()
 91+
 92+ return devices
 93+
5094 def checkAdaptec():
5195 # Need to change directory so that the log file goes to the right place
5296 oldDir = os.getcwd()
@@ -195,4 +239,96 @@
196240 print 'OK: State is %s, checked %d logical device(s)' % (state, numDrives)
197241 return 0
198242
 243+def checkZfs():
 244+ try:
 245+ proc = subprocess.Popen(['/sbin/zpool', 'list', '-Honame,health'],
 246+ stdout=subprocess.PIPE)
 247+ except:
 248+ error = sys.exc_info()[1]
 249+ print 'WARNING: error executing zpool: %s' % str(error)
 250+ return 1
 251+
 252+ regex = re.compile('^(\S+)\s+(\S+)')
 253+ status = 0
 254+ msg = ''
 255+ for line in proc.stdout:
 256+ m = regex.match(line)
 257+ if m != None:
 258+ name = m.group(1)
 259+ health = m.group(2)
 260+ if health != 'ONLINE':
 261+ status = 2
 262+
 263+ if msg != '':
 264+ msg += ', '
 265+ msg += name + ': ' + health
 266+
 267+ ret = proc.wait()
 268+ if ret != 0:
 269+ print 'WARNING: zpool returned exit status %d' % (ret)
 270+ return 1
 271+
 272+ if status:
 273+ print 'CRITICAL: ' + msg
 274+ else:
 275+ print 'OK: ' + msg
 276+ return status
 277+
 278+def checkSoftwareRaid():
 279+ devices = getSoftwareRaidDevices()
 280+ if len(devices) == 0:
 281+ print 'WARNING: Unexpectedly checked no devices'
 282+ return 1
 283+
 284+ args = ['/sbin/mdadm', '--detail']
 285+ args.extend(devices)
 286+ try:
 287+ proc = subprocess.Popen(args, stdout = subprocess.PIPE)
 288+ except:
 289+ error = sys.exc_info()[1]
 290+ print 'WARNING: error executing mdadm: %s' % str(error)
 291+ return 1
 292+
 293+ deviceRegex = re.compile('^(/[^ ]*):$')
 294+ statRegex = re.compile('^ *(Active|Working|Failed|Spare) Devices *: *(\d+)')
 295+ currentDevice = None
 296+ stats = {
 297+ 'Active': 0,
 298+ 'Working': 0,
 299+ 'Failed': 0,
 300+ 'Spare': 0
 301+ }
 302+ for line in proc.stdout:
 303+ m = deviceRegex.match(line)
 304+ if m == None:
 305+ if currentDevice == None:
 306+ continue
 307+ else:
 308+ currentDevice = m.group(1)
 309+ continue
 310+
 311+ m = statRegex.match(line)
 312+ if m == None:
 313+ continue
 314+
 315+ stats[m.group(1)] += int(m.group(2))
 316+
 317+ ret = proc.wait()
 318+ if ret != 0:
 319+ print 'WARNING: mdadm returned exit status %d' % (ret)
 320+ return 1
 321+
 322+ msg = ''
 323+ for name in ('Active', 'Working', 'Failed', 'Spare'):
 324+ if msg != '':
 325+ msg += ', '
 326+ msg += name + ': ' + str(stats[name])
 327+
 328+ if stats['Failed'] > 0:
 329+ print 'CRITICAL: ' + msg
 330+ return 2
 331+ else:
 332+ print 'OK: ' + msg
 333+ return 0
 334+
199335 main()
Index: trunk/debs/wikimedia-raid-utils/debian/changelog
@@ -1,3 +1,10 @@
 2+wikimedia-raid-utils (1.8) lucid; urgency=low
 3+
 4+ * Added Linux software RAID support
 5+ * Added ZFS pool support (on Solaris)
 6+
 7+ -- Tim Starling <tstarling@wikimedia.org> Mon, 12 Jul 2010 19:34:16 +1000
 8+
29 wikimedia-raid-utils (1.7) hardy; urgency=low
310
411 * Removed files erroneously installed in the root directory

Status & tagging log