nagios-nrpe: add ceph checks
gitea/ansible-roles/pipeline/head This commit looks good Details

This commit is contained in:
Alexis Ben Miloud--Josselin 2022-11-15 11:06:47 +01:00
parent 83138f0a0b
commit b797a5059a
10 changed files with 1664 additions and 0 deletions

View File

@ -0,0 +1,232 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 SWITCH http://www.switch.ch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
import argparse
import os
import subprocess
import sys
__version__ = '1.7.1'
# default ceph values
CEPH_COMMAND = '/usr/bin/ceph'
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3
def main():
# parse args
parser = argparse.ArgumentParser(description="'ceph df' nagios plugin.")
parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
parser.add_argument('-c','--conf', help='alternative ceph conf file')
parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
parser.add_argument('-i','--id', help='ceph client id')
parser.add_argument('-n','--name', help='ceph client name')
parser.add_argument('-k','--keyring', help='ceph client keyring file')
parser.add_argument('-p','--pool', help='ceph pool name')
parser.add_argument('-d','--detail', help="show pool details on warn and critical", action='store_true')
parser.add_argument('-W','--warn', help="warn above this percent RAW USED", type=float)
parser.add_argument('-C','--critical', help="critical alert above this percent RAW USED", type=float)
parser.add_argument('-V','--version', help='show version and exit', action='store_true')
args = parser.parse_args()
# validate args
ceph_exec = args.exe if args.exe else CEPH_COMMAND
if not os.path.exists(ceph_exec):
print("ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
return STATUS_UNKNOWN
if args.version:
print('version %s' % __version__)
return STATUS_OK
if args.conf and not os.path.exists(args.conf):
print("ERROR: ceph conf file '%s' doesn't exist" % args.conf)
return STATUS_UNKNOWN
if args.keyring and not os.path.exists(args.keyring):
print("ERROR: keyring file '%s' doesn't exist" % args.keyring)
return STATUS_UNKNOWN
if not args.warn or not args.critical or args.warn > args.critical:
print("ERROR: warn and critical level must be set and critical must be greater than warn")
return STATUS_UNKNOWN
# build command
ceph_df = [ceph_exec]
if args.monaddress:
ceph_df.append('-m')
ceph_df.append(args.monaddress)
if args.conf:
ceph_df.append('-c')
ceph_df.append(args.conf)
if args.id:
ceph_df.append('--id')
ceph_df.append(args.id)
if args.name:
ceph_df.append('--name')
ceph_df.append(args.name)
if args.keyring:
ceph_df.append('--keyring')
ceph_df.append(args.keyring)
ceph_df.append('df')
#print ceph_df
# exec command
p = subprocess.Popen(ceph_df,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
# parse output
# print "DEBUG: output:", output
# print "DEBUG: err:", err
if output:
output = output.decode('utf-8')
# parse output
# if detail switch was not set only show global values and compare to warning and critical
# otherwise show space for pools too
result=output.splitlines()
# values for GLOBAL are in 3rd line of output
globalline = result[2]
globalvals = globalline.split()
# Luminous vs Minic output (27.3TiB vs 27.3 TiB)
if len(globalvals) == 7:
gv = []
gv.append("{}{}".format(globalvals[0], globalvals[1]))
gv.append("{}{}".format(globalvals[2], globalvals[3]))
gv.append("{}{}".format(globalvals[4], globalvals[5]))
gv.append(globalvals[6])
globalvals = gv
#print "XXX: globalvals: {} {}".format(len(globalvals), globalvals)
# Nautilus output
if len(globalvals) == 10:
gv = []
gv.append("{}{}".format(globalvals[1], globalvals[2]))
gv.append("{}{}".format(globalvals[3], globalvals[4]))
gv.append("{}{}".format(globalvals[5], globalvals[6]))
gv.append(globalvals[9])
globalvals = gv
#print "XXX: globalvals: {} {}".format(len(globalvals), globalvals)
# prepare pool values
# pool output starts in line 4 with the bare word POOLS: followed by the output
poollines = result[3:]
if args.pool:
for line in poollines:
if args.pool in line:
poolvals = line.split()
# Luminous vs Minic output (27.3TiB vs 27.3 TiB)
if len(poolvals) == 8:
pv = []
pv.append(poolvals[0]) # NAME
pv.append(poolvals[1]) # ID
pv.append("{}{}".format(poolvals[2], poolvals[3])) # USED 27.3 TiB
pv.append(poolvals[4]) # %USED
pv.append("{}{}".format(poolvals[5], poolvals[6])) # MAX AVAIL 27.3 TiB
# pv.append(poolvals[7]) # OBJECTS
poolvals = pv
#print "XXX: poolvals: {} {}".format(len(poolvals), poolvals)
# Nautilus output
if len(poolvals) == 10:
pv = []
pv.append(poolvals[0]) # NAME
pv.append(poolvals[1]) # ID
pv.append("{}{}".format(poolvals[2], poolvals[3])) # USED 27.3 TiB
pv.append(poolvals[7]) # %USED
pv.append("{}{}".format(poolvals[8], poolvals[9])) # MAX AVAIL 27.3 TiB
# pv.append(poolvals[7]) # OBJECTS, not used
poolvals = pv
#print "XXX: poolvals: {} {}".format(len(poolvals), poolvals)
# Octopus >= v15.2.8 (pgs added to ceph-df)
if len(poolvals) == 11:
pv = []
pv.append(poolvals[0]) # NAME
pv.append(poolvals[1]) # ID
#pv.append(poolvals[2]) # PGS, not used
pv.append("{}{}".format(poolvals[3], poolvals[4])) # USED 27.3 TiB
pv.append(poolvals[8]) # %USED
pv.append("{}{}".format(poolvals[9], poolvals[10])) # MAX AVAIL 27.3 TiB
# pv.append(poolvals[7]) # OBJECTS, not used
poolvals = pv
#print "XXX: poolvals: {} {}".format(len(poolvals), poolvals)
pool_used = poolvals[2]
pool_usage_percent = float(poolvals[3])
pool_available_space = poolvals[4]
# pool_objects = float(poolvals[5]) # not used
if pool_usage_percent > args.critical:
print('CRITICAL: %s%% usage in Pool \'%s\' is above %s%% (%s used) | Usage=%s%%;%s;%s;;' % (pool_usage_percent, args.pool, args.critical, pool_used, pool_usage_percent, args.warn, args.critical))
return STATUS_ERROR
if pool_usage_percent > args.warn:
print('WARNING: %s%% usage in Pool \'%s\' is above %s%% (%s used) | Usage=%s%%;%s;%s;;' % (pool_usage_percent, args.pool, args.warn, pool_used, pool_usage_percent, args.warn, args.critical))
return STATUS_WARNING
else:
print('%s%% usage in Pool \'%s\' | Usage=%s%%;%s;%s;;' % (pool_usage_percent, args.pool, pool_usage_percent, args.warn, args.critical))
return STATUS_OK
else:
# print 'DEBUG:', globalvals
# finally 4th element contains percentual value
# print 'DEBUG USAGE:', globalvals[3]
global_usage_percent = float(globalvals[3])
global_available_space = globalvals[1]
global_total_space = globalvals[0]
# print 'DEBUG WARNLEVEL:', args.warn
# print 'DEBUG CRITICALLEVEL:', args.critical
if global_usage_percent > args.critical:
if args.detail:
poollines.insert(0, '\n')
poolout = '\n '.join(poollines)
else:
poolout = ''
print('CRITICAL: global RAW usage of %s%% is above %s%% (%s of %s free)%s | Usage=%s%%;%s;%s;;' % (global_usage_percent, args.critical, global_available_space, global_total_space, poolout, global_usage_percent, args.warn, args.critical))
return STATUS_ERROR
elif global_usage_percent > args.warn:
if args.detail:
poollines.insert(0, '\n')
poolout = '\n '.join(poollines)
else:
poolout = ''
print('WARNING: global RAW usage of %s%% is above %s%% (%s of %s free)%s | Usage=%s%%;%s;%s;;' % (global_usage_percent, args.warn, global_available_space, global_total_space, poolout, global_usage_percent, args.warn, args.critical))
return STATUS_WARNING
else:
print('RAW usage %s%% | Usage=%s%%;%s;%s;;' % (global_usage_percent, global_usage_percent, args.warn, args.critical))
return STATUS_OK
#for
elif err:
# read only first line of error
one_line = err.split('\n')[0]
if '-1 ' in one_line:
idx = one_line.rfind('-1 ')
print('ERROR: %s: %s' % (ceph_exec, one_line[idx+len('-1 '):]))
else:
print(one_line)
return STATUS_UNKNOWN
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,200 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013-2016 SWITCH http://www.switch.ch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
import argparse
import os
import subprocess
import sys
import re
import json
__version__ = '1.7.0'
# default ceph values
CEPH_ADM_COMMAND = '/usr/sbin/cephadm'
CEPH_COMMAND = '/usr/bin/ceph'
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3
def main():
# parse args
parser = argparse.ArgumentParser(description="'ceph health' nagios plugin.")
parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
parser.add_argument('-A','--admexe', help='cephadm executable [%s]' % CEPH_ADM_COMMAND)
parser.add_argument('--cluster', help='ceph cluster name')
parser.add_argument('-c','--conf', help='alternative ceph conf file')
parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
parser.add_argument('-i','--id', help='ceph client id')
parser.add_argument('-n','--name', help='ceph client name')
parser.add_argument('-k','--keyring', help='ceph client keyring file')
parser.add_argument('--check', help='regexp of which check(s) to check (luminous+) '
"Can be inverted, e.g. '^((?!(PG_DEGRADED|OBJECT_MISPLACED)$).)*$'")
parser.add_argument('-w','--whitelist', help='whitelist regexp for ceph health warnings')
parser.add_argument('-d','--detail', help="exec 'ceph health detail'", action='store_true')
parser.add_argument('-V','--version', help='show version and exit', action='store_true')
parser.add_argument('-a','--cephadm', help='uses cephadm to execute the command', action='store_true')
parser.add_argument('-s','--skip-muted', help='skip muted checks', action='store_true')
args = parser.parse_args()
# validate args
cephadm_exec = args.admexe if args.admexe else CEPH_ADM_COMMAND
ceph_exec = args.exe if args.exe else CEPH_COMMAND
if args.cephadm:
if not os.path.exists(cephadm_exec):
print("ERROR: cephadm executable '%s' doesn't exist" % cephadm_exec)
return STATUS_UNKNOWN
else:
if not os.path.exists(ceph_exec):
print("ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
return STATUS_UNKNOWN
if args.version:
print('version %s' % __version__)
return STATUS_OK
if args.conf and not os.path.exists(args.conf):
print("ERROR: ceph conf file '%s' doesn't exist" % args.conf)
return STATUS_UNKNOWN
if args.keyring and not os.path.exists(args.keyring):
print("ERROR: keyring file '%s' doesn't exist" % args.keyring)
return STATUS_UNKNOWN
# build command
ceph_health = [ceph_exec]
if args.cephadm:
# Prepend the command with the cephadm binary and the shell command
ceph_health = [cephadm_exec, 'shell'] + ceph_health
if args.monaddress:
ceph_health.append('-m')
ceph_health.append(args.monaddress)
if args.cluster:
ceph_health.append('--cluster')
ceph_health.append(args.cluster)
if args.conf:
ceph_health.append('-c')
ceph_health.append(args.conf)
if args.id:
ceph_health.append('--id')
ceph_health.append(args.id)
if args.name:
ceph_health.append('--name')
ceph_health.append(args.name)
if args.keyring:
ceph_health.append('--keyring')
ceph_health.append(args.keyring)
ceph_health.append('health')
if args.detail:
ceph_health.append('detail')
ceph_health.append('--format')
ceph_health.append('json')
#print(ceph_health)
# exec command
p = subprocess.Popen(ceph_health,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
try:
output = json.loads(output)
except ValueError:
output = dict()
# parse output
# print "output:", output
#print "err:", err
if output:
ret = STATUS_OK
msg = ""
extended = []
if 'checks' in output:
#luminous
for check,status in output['checks'].items():
# skip check if not selected
if args.check and not re.search(args.check, check):
continue
if args.skip_muted and ('muted' in status and status['muted']):
continue
check_detail = "%s( %s )" % (check, status['summary']['message'])
if status["severity"] == "HEALTH_ERR":
extended.append(msg)
msg = "CRITICAL: %s" % check_detail
ret = STATUS_ERROR
continue
if args.whitelist and re.search(args.whitelist,status['summary']['message']):
continue
check_msg = "WARNING: %s" % check_detail
if not msg:
msg = check_msg
ret = STATUS_WARNING
else:
extended.append(check_msg)
else:
#pre-luminous
for status in output["summary"]:
if status != "HEALTH_OK":
if status == "HEALTH_ERROR":
msg = "CRITICAL: %s" % status['summary']
ret = STATUS_ERROR
continue
if args.whitelist and re.search(args.whitelist,status['summary']):
continue
if not msg:
msg = "WARNING: %s" % status['summary']
ret = STATUS_WARNING
else:
extended.append("WARNING: %s" % status['summary'])
if msg:
print(msg)
else:
print("HEALTH OK")
if extended: print('\n'.join(extended))
return ret
elif err:
# read only first line of error
one_line = err.split('\n')[0]
if '-1 ' in one_line:
idx = one_line.rfind('-1 ')
print('ERROR: %s: %s' % (ceph_exec, one_line[idx+len('-1 '):]))
else:
print(one_line)
return STATUS_UNKNOWN
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,188 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 Catalyst IT http://www.catalyst.net.nz
# Copyright (c) 2015 SWITCH http://www.switch.ch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import socket
import os
import re
import subprocess
import sys
import json
__version__ = '1.6.0'
# default ceph values
CEPH_EXEC = '/usr/bin/ceph'
CEPH_COMMAND = 'mds stat -f json'
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3
def main():
# parse args
parser = argparse.ArgumentParser(description="'ceph mds stat' nagios plugin.")
parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_EXEC)
parser.add_argument('-c','--conf', help='alternative ceph conf file')
parser.add_argument('-m','--monaddress', help='ceph monitor to use for queries (address[:port])')
parser.add_argument('-i','--id', help='ceph client id')
parser.add_argument('-k','--keyring', help='ceph client keyring file')
parser.add_argument('-V','--version', help='show version and exit', action='store_true')
parser.add_argument('-n','--name', help='mds daemon name', required=True)
parser.add_argument('-f','--filesystem', help='mds filesystem name', required=True)
args = parser.parse_args()
if args.version:
print('version %s' % __version__)
return STATUS_OK
# validate args
ceph_exec = args.exe if args.exe else CEPH_EXEC
if not os.path.exists(ceph_exec):
print("MDS ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
return STATUS_UNKNOWN
if args.conf and not os.path.exists(args.conf):
print("MDS ERROR: ceph conf file '%s' doesn't exist" % args.conf)
return STATUS_UNKNOWN
if args.keyring and not os.path.exists(args.keyring):
print("MDS ERROR: keyring file '%s' doesn't exist" % args.keyring)
return STATUS_UNKNOWN
# build command
ceph_cmd = [ceph_exec]
if args.monaddress:
ceph_cmd.append('-m')
ceph_cmd.append(args.monaddress)
if args.conf:
ceph_cmd.append('-c')
ceph_cmd.append(args.conf)
if args.id:
ceph_cmd.append('--id')
ceph_cmd.append(args.id)
if args.keyring:
ceph_cmd.append('--keyring')
ceph_cmd.append(args.keyring)
ceph_cmd.extend(CEPH_COMMAND.split(' '))
# exec command
p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
if p.returncode != 0 or not output:
print("MDS ERROR: %s" % err)
return STATUS_ERROR
# load json output and parse
mds_stat = None
try:
mds_stat = json.loads(output)
except Exception as e:
print("MDS ERROR: could not parse '%s' output: %s: %s" % (CEPH_COMMAND,output,e))
return STATUS_UNKNOWN
return check_target_mds(mds_stat, args.filesystem, args.name)
def check_target_mds(mds_stat, fs_name, name):
# find mds from standby list
standby_mdss = _get_standby_mds(mds_stat)
for mds in standby_mdss:
if mds.get_name() == name:
print("MDS OK: %s" % (mds))
return STATUS_OK
# find mds from active list
active_mdss = _get_active_mds(mds_stat, fs_name)
if active_mdss:
for mds in active_mdss:
if mds.get_name() != name:
continue
# target mds in active list
print("MDS %s: %s" % ("WARN" if mds.is_laggy() else "OK", mds))
return STATUS_WARNING if mds.is_laggy() else STATUS_OK
# mds not found
print("MDS ERROR: MDS '%s' is not found (offline?)" % (name))
return STATUS_ERROR
else:
# fs not found in map, perhaps user input error
print("MDS ERROR: FS '%s' is not found in fsmap" % (fs_name))
return STATUS_ERROR
def _get_standby_mds(mds_stat):
mds_array = []
for mds in mds_stat['fsmap']['standbys']:
name = mds['name']
state = mds['state']
laggy_since = mds['laggy_since'] if 'laggy_since' in mds else None
mds_array.append(MDS(name, state))
return mds_array
def _get_active_mds(mds_stat, fs_name):
mds_fs = mds_stat['fsmap']['filesystems']
# find filesystem in stat
for i in range(len(mds_fs)):
mdsmap = mds_fs[i]['mdsmap']
if mdsmap['fs_name'] != fs_name:
continue
# put mds to array
mds_array = []
infos = mds_stat['fsmap']['filesystems'][i]['mdsmap']['info']
for gid in infos:
name = infos[gid]['name']
state = infos[gid]['state']
laggy_since = infos[gid]['laggy_since'] if 'laggy_since' in infos[gid] else None
mds_array.append(MDS(name, state, laggy_since))
return mds_array
# no fs found
return None
class MDS(object):
def __init__(self, name, state, laggy_since=None):
self.name = name
self.state = state
self.laggy_since = laggy_since
def get_name(self):
return self.name
def get_state(self):
return self.state
def is_laggy(self):
return self.laggy_since is not None
def __str__(self):
msg = "MDS '%s' is %s" % (self.name, self.state)
if self.laggy_since is not None:
msg += " (laggy or crashed)"
return msg
# main
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,188 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2018 SWITCH http://www.switch.ch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
import argparse
import os
import subprocess
import sys
import json
__version__ = '1.0.0'
# default ceph values
CEPH_EXEC = '/usr/bin/ceph'
CEPH_COMMAND = 'mgr dump -f json'
CEPH_MGR_DUMP_EXAMPLE = '''
$ ceph --version
ceph version 12.2.7 (3ec878d1e53e1aeb47a9f619c49d9e7c0aa384d5) luminous (stable)
$ ceph mgr dump -f json|jq .
{
"epoch": 165,
"active_gid": 248001409,
"active_name": "zhdk0013",
"active_addr": "10.10.10.9:6800/810408",
"available": true,
"standbys": [
{
"gid": 247991934,
"name": "zhdk0009",
"available_modules": [
"balancer",
"dashboard",
"influx",
"localpool",
"prometheus",
"restful",
"selftest",
"status",
"zabbix"
]
},
{
"gid": 248011196,
"name": "zhdk0025",
"available_modules": [
"balancer",
"dashboard",
"influx",
"localpool",
"prometheus",
"restful",
"selftest",
"status",
"zabbix"
]
}
],
"modules": [
"balancer",
"restful",
"status"
],
"available_modules": [
"balancer",
"dashboard",
"influx",
"localpool",
"prometheus",
"restful",
"selftest",
"status",
"zabbix"
],
"services": {}
}
'''
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3
def main():
# parse args
parser = argparse.ArgumentParser(description="'ceph mgr dump' nagios plugin.")
parser.add_argument('-e', '--exe', help='ceph executable [%s]' % CEPH_EXEC)
parser.add_argument('-c', '--conf', help='alternative ceph conf file')
parser.add_argument('-m', '--monaddress', help='ceph monitor to use for queries (address[:port])')
parser.add_argument('-i', '--id', help='ceph client id')
parser.add_argument('-n', '--name', help='ceph client name')
parser.add_argument('-k', '--keyring', help='ceph client keyring file')
parser.add_argument('-V', '--version', help='show version and exit', action='store_true')
args = parser.parse_args()
if args.version:
print("version {}".format(__version__))
return STATUS_OK
# validate args
ceph_exec = args.exe if args.exe else CEPH_EXEC
if not os.path.exists(ceph_exec):
print("MGR ERROR: ceph executable '{}' doesn't exist".format(ceph_exec))
return STATUS_UNKNOWN
if args.conf and not os.path.exists(args.conf):
print("MGR ERROR: ceph conf file '{}' doesn't exist".format(args.conf))
return STATUS_UNKNOWN
if args.keyring and not os.path.exists(args.keyring):
print("MGR ERROR: keyring file '{}' doesn't exist".format(args.keyring))
return STATUS_UNKNOWN
# build command
ceph_cmd = [ceph_exec]
if args.monaddress:
ceph_cmd.append('-m')
ceph_cmd.append(args.monaddress)
if args.conf:
ceph_cmd.append('-c')
ceph_cmd.append(args.conf)
if args.id:
ceph_cmd.append('--id')
ceph_cmd.append(args.id)
if args.name:
ceph_cmd.append('--name')
ceph_cmd.append(args.name)
if args.keyring:
ceph_cmd.append('--keyring')
ceph_cmd.append(args.keyring)
ceph_cmd.extend(CEPH_COMMAND.split(' '))
# exec command
p = subprocess.Popen(ceph_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = p.communicate()
if p.returncode != 0 or not output:
print("MGR ERROR: {}".format(err))
return STATUS_UNKNOWN
# load json output and parse
mgr_dump = None
try:
mgr_dump = json.loads(output)
except Exception as e:
print("MGR ERROR: could not parse '{}' output: {}: {}".format(ceph_cmd, output, e))
return STATUS_UNKNOWN
# check active
if 'active_name' not in mgr_dump:
print("MGR CRITICAL: not active mgr found")
print("JSON: {}".format(json.dumps(mgr_dump)))
return STATUS_ERROR
active_mgr_name = mgr_dump['active_name']
# check standby
standby_mgr_names = []
for standby_mgr in mgr_dump['standbys']:
standby_mgr_names.append(standby_mgr['name'])
if len(standby_mgr_names) <= 0:
print("MGR WARN: active: {} but no standbys".format(active_mgr_name))
return STATUS_WARNING
else:
print("MGR OK: active: {}, standbys: {}".format(active_mgr_name,
", ".join(standby_mgr_names)))
return STATUS_OK
# main
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,163 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 Catalyst IT http://www.catalyst.net.nz
# Copyright (c) 2015 SWITCH http://www.switch.ch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
import argparse
import socket
import os
import re
import subprocess
import sys
import json
__version__ = '1.5.0'
# default ceph values
CEPH_EXEC = '/usr/bin/ceph'
CEPH_COMMAND = 'quorum_status'
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3
##
# ceph quorum_status output example
##
ceph_quorum_status_output_example = '''{
"quorum_leader_name" : "s0001",
"monmap" : {
"mons" : [
{
"name" : "s0001",
"addr" : "[2001:620:5ca1:8000::1001]:6789/0",
"rank" : 0
},
{
"name" : "s0003",
"addr" : "[2001:620:5ca1:8000::1003]:6789/0",
"rank" : 1
}
],
"created" : "2014-12-15 08:28:35.153650",
"epoch" : 2,
"modified" : "2014-12-15 08:28:40.371878",
"fsid" : "22348d2b-b69d-46cc-9a79-ca93cd6bae84"
},
"quorum_names" : [
"s0001",
"s0003"
],
"quorum" : [
0,
1
],
"election_epoch" : 24
}'''
def main():
# parse args
parser = argparse.ArgumentParser(description="'ceph quorum_status' nagios plugin.")
parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_EXEC)
parser.add_argument('-c','--conf', help='alternative ceph conf file')
parser.add_argument('-m','--monaddress', help='ceph monitor to use for queries (address[:port])')
parser.add_argument('-i','--id', help='ceph client id')
parser.add_argument('-k','--keyring', help='ceph client keyring file')
parser.add_argument('-V','--version', help='show version and exit', action='store_true')
parser.add_argument('-I','--monid', help='mon ID to be checked for availability')
args = parser.parse_args()
if args.version:
print('version %s' % __version__)
return STATUS_OK
# validate args
ceph_exec = args.exe if args.exe else CEPH_EXEC
if not os.path.exists(ceph_exec):
print("MON ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
return STATUS_UNKNOWN
if args.conf and not os.path.exists(args.conf):
print("MON ERROR: ceph conf file '%s' doesn't exist" % args.conf)
return STATUS_UNKNOWN
if args.keyring and not os.path.exists(args.keyring):
print("MON ERROR: keyring file '%s' doesn't exist" % args.keyring)
return STATUS_UNKNOWN
if not args.monid:
print("MON ERROR: no MON ID given, use -I/--monid parameter")
return STATUS_UNKNOWN
# build command
ceph_cmd = [ceph_exec]
if args.monaddress:
ceph_cmd.append('-m')
ceph_cmd.append(args.monaddress)
if args.conf:
ceph_cmd.append('-c')
ceph_cmd.append(args.conf)
if args.id:
ceph_cmd.append('--id')
ceph_cmd.append(args.id)
if args.keyring:
ceph_cmd.append('--keyring')
ceph_cmd.append(args.keyring)
ceph_cmd.append(CEPH_COMMAND)
# exec command
p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
if p.returncode != 0 or not output:
print("MON ERROR: %s" % err)
return STATUS_ERROR
# load json output and parse
quorum_status = False
try:
quorum_status = json.loads(output)
except Exception as e:
print("MON ERROR: could not parse '%s' output: %s: %s" % (CEPH_COMMAND,output,e))
return STATUS_UNKNOWN
#print "XXX: quorum_status['quorum_names']:", quorum_status['quorum_names']
# do our checks
is_monitor = False
for mon in quorum_status['monmap']['mons']:
if mon['name'] == args.monid:
is_monitor = True
if not is_monitor:
print("MON WARN: mon '%s' is not in monmap: %s" % (args.monid,quorum_status['monmap']['mons']))
return STATUS_WARNING
in_quorum = args.monid in quorum_status['quorum_names']
if in_quorum:
print("MON OK")
return STATUS_OK
else:
print("MON WARN: no MON '%s' found in quorum" % args.monid)
return STATUS_WARNING
# main
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,154 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 Catalyst IT http://www.catalyst.net.nz
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# 1.5.2 (2019-06-16) Martin Seener: fixed regex to work with Ceph Nautilus (14.2.x)
from __future__ import print_function
import argparse
import os
import re
import subprocess
import sys
import socket
__version__ = '1.5.2'
# default ceph values
CEPH_COMMAND = '/usr/bin/ceph'
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3
def main():
# parse args
parser = argparse.ArgumentParser(description="'ceph osd' nagios plugin.")
parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
parser.add_argument('-c','--conf', help='alternative ceph conf file')
parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
parser.add_argument('-i','--id', help='ceph client id')
parser.add_argument('-k','--keyring', help='ceph client keyring file')
parser.add_argument('-V','--version', help='show version and exit', action='store_true')
parser.add_argument('-H','--host', help='osd host', required=True)
parser.add_argument('-I','--osdid', help='osd id', required=False)
parser.add_argument('-C','--crit', help='Number of failed OSDs to trigger critical (default=2)',type=int,default=2, required=False)
parser.add_argument('-o','--out', help='check osds that are set OUT', default=False, action='store_true', required=False)
args = parser.parse_args()
# validate args
ceph_exec = args.exe if args.exe else CEPH_COMMAND
if not os.path.exists(ceph_exec):
print("OSD ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
return STATUS_UNKNOWN
if args.version:
print('version %s' % __version__)
return STATUS_OK
if args.conf and not os.path.exists(args.conf):
print("OSD ERROR: ceph conf file '%s' doesn't exist" % args.conf)
return STATUS_UNKNOWN
if args.keyring and not os.path.exists(args.keyring):
print("OSD ERROR: keyring file '%s' doesn't exist" % args.keyring)
return STATUS_UNKNOWN
if not args.osdid:
args.osdid = '[^ ]*'
if not args.host:
print("OSD ERROR: no OSD hostname given")
return STATUS_UNKNOWN
try:
addrinfo = socket.getaddrinfo(args.host, None, 0, socket.SOCK_STREAM)
args.host = addrinfo[0][-1][0]
if addrinfo[0][0] == socket.AF_INET6:
args.host = "[%s]" % args.host
except:
print('OSD ERROR: could not resolve %s' % args.host)
return STATUS_UNKNOWN
# build command
ceph_cmd = [ceph_exec]
if args.monaddress:
ceph_cmd.append('-m')
ceph_cmd.append(args.monaddress)
if args.conf:
ceph_cmd.append('-c')
ceph_cmd.append(args.conf)
if args.id:
ceph_cmd.append('--id')
ceph_cmd.append(args.id)
if args.keyring:
ceph_cmd.append('--keyring')
ceph_cmd.append(args.keyring)
ceph_cmd.append('osd')
ceph_cmd.append('dump')
# exec command
p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
output = output.decode('utf8')
if err or not output:
print("OSD ERROR: %s" % err)
return STATUS_ERROR
# escape IPv4 host address
osd_host = args.host.replace('.', '\.')
# escape IPv6 host address
osd_host = osd_host.replace('[', '\[')
osd_host = osd_host.replace(']', '\]')
up = re.findall(r"^(osd\.%s) up.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
if args.out:
down = re.findall(r"^(osd\.%s) down.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
down_in = re.findall(r"^(osd\.%s) down[ ]+in.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
down_out = re.findall(r"^(osd\.%s) down[ ]+out.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
else:
down = re.findall(r"^(osd\.%s) down[ ]+in.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
down_in = down
down_out = re.findall(r"^(osd\.%s) down[ ]+out.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
if down:
print("OSD %s: Down OSD%s on %s: %s" % ('CRITICAL' if len(down)>=args.crit else 'WARNING' ,'s' if len(down)>1 else '', args.host, " ".join(down)))
print("Up OSDs: " + " ".join(up))
print("Down+In OSDs: " + " ".join(down_in))
print("Down+Out OSDs: " + " ".join(down_out))
print("| 'osd_up'=%d 'osd_down_in'=%d;;%d 'osd_down_out'=%d;;%d" % (len(up), len(down_in), args.crit, len(down_out), args.crit))
if len(down)>=args.crit:
return STATUS_ERROR
else:
return STATUS_WARNING
if up:
print("OSD OK")
print("Up OSDs: " + " ".join(up))
print("Down+In OSDs: " + " ".join(down_in))
print("Down+Out OSDs: " + " ".join(down_out))
print("| 'osd_up'=%d 'osd_down_in'=%d;;%d 'osd_down_out'=%d;;%d" % (len(up), len(down_in), args.crit, len(down_out), args.crit))
return STATUS_OK
print("OSD WARN: no OSD.%s found on host %s" % (args.osdid, args.host))
return STATUS_WARNING
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,152 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2020 Binero AB https://binero.com
# Copyright (c) 2013 Catalyst IT http://www.catalyst.net.nz
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import re
import subprocess
import sys
import socket
import json
CEPH_COMMAND = '/usr/bin/ceph'
STATUS_OK = 0
STATUS_CRITICAL = 2
STATUS_UNKNOWN = 3
def main():
parser = argparse.ArgumentParser(description="'ceph osd' nagios plugin.")
parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
parser.add_argument('-c','--conf', help='alternative ceph conf file')
parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
parser.add_argument('-i','--id', help='ceph client id')
parser.add_argument('-k','--keyring', help='ceph client keyring file')
parser.add_argument('-H','--host', help='osd host', required=True)
parser.add_argument('-C','--critical', help='critical threshold', default=60)
args = parser.parse_args()
ceph_exec = args.exe if args.exe else CEPH_COMMAND
if not os.path.exists(ceph_exec):
print("UNKNOWN: ceph executable '%s' doesn't exist" % ceph_exec)
return STATUS_UNKNOWN
if args.conf and not os.path.exists(args.conf):
print("UNKNOWN: ceph conf file '%s' doesn't exist" % args.conf)
return STATUS_UNKNOWN
if args.keyring and not os.path.exists(args.keyring):
print("UNKNOWN: keyring file '%s' doesn't exist" % args.keyring)
return STATUS_UNKNOWN
if not args.host:
print("UNKNOWN: no OSD hostname given")
return STATUS_UNKNOWN
try:
addrinfo = socket.getaddrinfo(args.host, None, 0, socket.SOCK_STREAM)
args.host = addrinfo[0][-1][0]
if addrinfo[0][0] == socket.AF_INET6:
args.host = "[%s]" % args.host
except Exception:
print('UNKNOWN: could not resolve %s' % args.host)
return STATUS_UNKNOWN
ceph_cmd = [ceph_exec]
if args.monaddress:
ceph_cmd.append('-m')
ceph_cmd.append(args.monaddress)
if args.conf:
ceph_cmd.append('-c')
ceph_cmd.append(args.conf)
if args.id:
ceph_cmd.append('--id')
ceph_cmd.append(args.id)
if args.keyring:
ceph_cmd.append('--keyring')
ceph_cmd.append(args.keyring)
ceph_cmd.append('osd')
ceph_cmd.append('dump')
p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
if err or not output:
print("CRITICAL: %s" % err)
return STATUS_CRITICAL
# escape IPv4 host address
osd_host = args.host.replace('.', '\.')
# escape IPv6 host address
osd_host = osd_host.replace('[', '\[')
osd_host = osd_host.replace(']', '\]')
osds_up = re.findall(r"^(osd\.[^ ]*) up.*%s:" % (osd_host), output, re.MULTILINE)
final_status = STATUS_OK
lines = []
for osd in osds_up:
daemon_ceph_cmd = [ceph_exec, '--format', 'json']
daemon_ceph_cmd.append('daemon')
daemon_ceph_cmd.append(osd)
daemon_ceph_cmd.append('perf')
daemon_ceph_cmd.append('dump')
p = subprocess.Popen(daemon_ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
if err or not output:
print("CRITICAL: %s" % err)
return STATUS_CRITICAL
try:
data = json.loads(output)
except Exception:
print("CRITICAL: failed to load json")
return STATUS_CRITICAL
bluefs = data.get('bluefs', None)
if not bluefs:
continue
db_total_bytes = bluefs.get('db_total_bytes')
db_used_bytes = bluefs.get('db_used_bytes')
perc = (float(db_used_bytes) / float(db_total_bytes) * 100)
if perc >= args.critical and final_status == STATUS_OK:
final_status = STATUS_CRITICAL
lines.append("%s=%.2f%%" % (osd, perc))
if final_status == STATUS_OK:
print("OK: %s" % (' '.join(lines)))
else:
print("CRITICAL: %s" % (' '.join(lines)))
return final_status
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,153 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# check_ceph_osd_df - Check OSD DF output
# Copyright (c) 2020 noris network AG https://www.noris.de
#
# This plugin will not output perfdata as there is likely a lot of output
# which should be gathered using other tools.
#
# Parts based on code from check_ceph_df which is
# Copyright (c) 2013 SWITCH http://www.switch.ch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
import argparse
import os
import subprocess
import sys
import json
from operator import itemgetter
# Semver
__version__ = '1.0.0'
# default ceph values
CEPH_COMMAND = '/usr/bin/ceph'
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3
def main():
# parse args
parser = argparse.ArgumentParser(description="'ceph osd df' nagios plugin.")
parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
parser.add_argument('-c','--conf', help='alternative ceph conf file')
parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
parser.add_argument('-i','--id', help='ceph client id')
parser.add_argument('-n','--name', help='ceph client name')
parser.add_argument('-k','--keyring', help='ceph client keyring file')
parser.add_argument('-W','--warn', help="warn above this percent USED", type=float)
parser.add_argument('-C','--critical', help="critical alert above this percent USED", type=float)
parser.add_argument('-V','--version', help='show version and exit', action='store_true')
args = parser.parse_args()
# validate args
ceph_exec = args.exe if args.exe else CEPH_COMMAND
if not os.path.exists(ceph_exec):
print("ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
return STATUS_UNKNOWN
if args.version:
print('version %s' % __version__)
return STATUS_OK
if args.conf and not os.path.exists(args.conf):
print("ERROR: ceph conf file '%s' doesn't exist" % args.conf)
return STATUS_UNKNOWN
if args.keyring and not os.path.exists(args.keyring):
print("ERROR: keyring file '%s' doesn't exist" % args.keyring)
return STATUS_UNKNOWN
if not args.warn or not args.critical or args.warn > args.critical:
print("ERROR: warn and critical level must be set and critical must be greater than warn")
return STATUS_UNKNOWN
# build command
ceph_osd_df = [ceph_exec]
if args.monaddress:
ceph_osd_df.append('-m')
ceph_osd_df.append(args.monaddress)
if args.conf:
ceph_osd_df.append('-c')
ceph_osd_df.append(args.conf)
if args.id:
ceph_osd_df.append('--id')
ceph_osd_df.append(args.id)
if args.name:
ceph_osd_df.append('--name')
ceph_osd_df.append(args.name)
if args.keyring:
ceph_osd_df.append('--keyring')
ceph_osd_df.append(args.keyring)
ceph_osd_df.append('osd')
ceph_osd_df.append('df')
ceph_osd_df.append('--format=json')
# exec command
p = subprocess.Popen(ceph_osd_df,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
# parse output
# print "DEBUG: output:", output
# print "DEBUG: err:", err
if output:
# parse output
try:
result = json.loads(output)
check_return_value = STATUS_OK
nodes_sorted = sorted(result["nodes"], key=itemgetter('utilization','id'))
warn_crit_osds = []
for node in reversed(nodes_sorted):
if node["utilization"] >= args.warn and check_return_value is not STATUS_ERROR:
check_return_value = STATUS_WARNING
warn_crit_osds.append("{}={:04.2f}".format(node["name"], node["utilization"]))
if node["utilization"] >= args.critical:
check_return_value = STATUS_ERROR
warn_crit_osds.append("{}={:04.2f}".format(node["name"], node["utilization"]))
if check_return_value == STATUS_OK:
print("OK: All OSDs within limits")
return STATUS_OK
elif check_return_value == STATUS_WARNING:
print("WARNING: OSD usage above warn threshold: {:.4054}".format(", ".join(warn_crit_osds)))
return STATUS_WARNING
elif check_return_value == STATUS_ERROR:
print("CRITICAL: OSD usage above critical or warn threshold: {:.4041}".format(", ".join(warn_crit_osds)))
return STATUS_ERROR
except:
print("ERROR: {}".format(sys.exc_info()[0]))
return STATUS_UNKNOWN
elif err:
# read only first line of error
one_line = err.split('\n')[0]
if '-1 ' in one_line:
idx = one_line.rfind('-1 ')
print('ERROR: %s: %s' % (ceph_exec, one_line[idx+len('-1 '):]))
else:
print(one_line)
return STATUS_UNKNOWN
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,118 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2014 Catalyst IT http://www.catalyst.net.nz
# Copyright (c) 2015 SWITCH http://www.switch.ch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import os
import re
import subprocess
import sys
import json
__version__ = '1.5.1'
# default ceph values
RGW_COMMAND = '/usr/bin/radosgw-admin'
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3
def main():
# parse args
parser = argparse.ArgumentParser(description="'radosgw-admin bucket stats' nagios plugin.")
parser.add_argument('-d','--detail', help='output perf data for all buckets', action='store_true')
parser.add_argument('-B','--byte', help='output perf data in Byte instead of KB', action='store_true')
parser.add_argument('-e','--exe', help='radosgw-admin executable [%s]' % RGW_COMMAND)
parser.add_argument('-c','--conf', help='alternative ceph conf file')
parser.add_argument('-i','--id', help='ceph client id')
parser.add_argument('-n','--name', help='ceph client name (type.id)')
parser.add_argument('-V','--version', help='show version and exit', action='store_true')
args = parser.parse_args()
# validate args
rgw_exec = args.exe if args.exe else RGW_COMMAND
if not os.path.exists(rgw_exec):
print("RGW ERROR: radosgw-admin executable '%s' doesn't exist" % rgw_exec)
return STATUS_UNKNOWN
if args.version:
print('version %s' % __version__)
return STATUS_OK
if args.conf and not os.path.exists(args.conf):
print("RGW ERROR: ceph conf file '%s' doesn't exist" % args.conf)
return STATUS_UNKNOWN
# build command
rgw_cmd = [rgw_exec]
if args.conf:
rgw_cmd.append('-c')
rgw_cmd.append(args.conf)
if args.id:
rgw_cmd.append('--id')
rgw_cmd.append(args.id)
if args.name:
rgw_cmd.append('-n')
rgw_cmd.append(args.name)
rgw_cmd.append('bucket')
rgw_cmd.append('stats')
# exec command
p = subprocess.Popen(rgw_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
if p.returncode != 0 or not output:
print("RGW ERROR: %s :: %s" % (output, err))
return STATUS_ERROR
bucket_stats = json.loads(output)
#print bucket_stats
buckets = []
for i in bucket_stats:
if type(i) is dict:
bucket_name = i['bucket']
usage_dict = i['usage']
if usage_dict and 'rgw.main' in usage_dict:
bucket_usage_kb = usage_dict['rgw.main']['size_kb_actual']
else:
bucket_usage_kb = 0
buckets.append((bucket_name, bucket_usage_kb))
buckets_total_kb = sum([b[1] for b in buckets])
if args.byte:
status = "RGW OK: {} buckets, {} KB total | /={}B ".format(len(buckets),buckets_total_kb,buckets_total_kb*1024)
else:
status = "RGW OK: {} buckets, {} KB total | /={}KB ".format(len(buckets),buckets_total_kb,buckets_total_kb)
#print buckets
if buckets and args.detail:
if args.byte:
status = status + " ".join(["{}={}B".format(b[0],b[1]*1024) for b in buckets])
else:
status = status + " ".join(["{}={}KB".format(b[0],b[1]) for b in buckets])
print(status)
return STATUS_OK
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,116 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2014 Catalyst IT http://www.catalyst.net.nz
# Copyright (c) 2015 SWITCH http://www.switch.ch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import requests
import warnings
import json
import argparse
import sys
from awsauth import S3Auth
__version__ = '1.7.2'
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_CRITICAL = 2
STATUS_UNKNOWN = 3
def main():
# parse args
parser = argparse.ArgumentParser(description="'radosgw api bucket stats' nagios plugin.")
parser.add_argument('-H', '--host', help="Server URL for the radosgw api (example: http://objects.dreamhost.com/)", required=True)
parser.add_argument('-k', '--insecure', help="Allow insecure server connections when using SSL", action="store_false")
parser.add_argument('-e', '--admin_entry', help="The entry point for an admin request URL [default is '%(default)s']", default="admin")
parser.add_argument('-a', '--access_key', help="S3 access key", required=True)
parser.add_argument('-s', '--secret_key', help="S3 secret key", required=True)
parser.add_argument('-d', '--detail', help="output perf data for all buckets", action="store_true")
parser.add_argument('-b', '--byte', help="output perf data in Byte instead of KB", action="store_true")
parser.add_argument('-v', '--version', help='show version and exit', action="store_true")
args = parser.parse_args()
if args.version:
print("version {0}".format(__version__))
return STATUS_OK
# helpers for default schema
if not args.host.startswith("http"):
args.host = "http://{0}".format(args.host)
# and for request_uri
if not args.host.endswith("/"):
args.host = "{0}/".format(args.host)
url = "{0}{1}/bucket?format=json&stats=True".format(args.host,
args.admin_entry)
try:
# Inversion of condition, when '--insecure' is defined we disable
# requests warning about certificate hostname mismatch.
if not args.insecure:
warnings.filterwarnings('ignore', message='Unverified HTTPS request')
response = requests.get(url, verify=args.insecure,
auth=S3Auth(args.access_key, args.secret_key,
args.host))
if response.status_code == requests.codes.ok:
bucket_stats = response.json()
else:
# no usage caps or wrong admin entry
print("RGW ERROR [{0}]: {1}".format(response.status_code,
response.content.decode('utf-8')))
return STATUS_WARNING
# DNS, connection errors, etc
except requests.exceptions.RequestException as e:
print("RGW ERROR: {0}".format(e))
return STATUS_UNKNOWN
#print(bucket_stats)
buckets = []
for i in bucket_stats:
if type(i) is dict:
bucket_name = i['bucket']
usage_dict = i['usage']
if usage_dict and 'rgw.main' in usage_dict:
bucket_usage_kb = usage_dict['rgw.main']['size_kb_actual']
else:
bucket_usage_kb = 0
buckets.append((bucket_name, bucket_usage_kb))
buckets_total_kb = sum([b[1] for b in buckets])
status = "RGW OK: {0} buckets, {1} KB total | /={2}{3} "
if args.byte:
status = status.format(len(buckets), buckets_total_kb, buckets_total_kb*1024, "B")
else:
status = status.format(len(buckets), buckets_total_kb, buckets_total_kb, "KB")
#print(buckets)
if buckets and args.detail:
if args.byte:
status = status + " ".join(["{}={}B".format(b[0], b[1]*1024) for b in buckets])
else:
status = status + " ".join(["{}={}KB".format(b[0], b[1]) for b in buckets])
print(status)
return STATUS_OK
if __name__ == "__main__":
sys.exit(main())