#!/usr/bin/env python # -*- coding: utf-8 -*- # # check_ceph_osd_df - Check OSD DF output # Copyright (c) 2020 noris network AG https://www.noris.de # # This plugin will not output perfdata as there is likely a lot of output # which should be gathered using other tools. # # Parts based on code from check_ceph_df which is # Copyright (c) 2013 SWITCH http://www.switch.ch # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from __future__ import print_function import argparse import os import subprocess import sys import json from operator import itemgetter # Semver __version__ = '1.0.0' # default ceph values CEPH_COMMAND = '/usr/bin/ceph' # nagios exit code STATUS_OK = 0 STATUS_WARNING = 1 STATUS_ERROR = 2 STATUS_UNKNOWN = 3 def main(): # parse args parser = argparse.ArgumentParser(description="'ceph osd df' nagios plugin.") parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND) parser.add_argument('-c','--conf', help='alternative ceph conf file') parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]') parser.add_argument('-i','--id', help='ceph client id') parser.add_argument('-n','--name', help='ceph client name') parser.add_argument('-k','--keyring', help='ceph client keyring file') parser.add_argument('-W','--warn', help="warn above this percent USED", type=float) parser.add_argument('-C','--critical', help="critical alert above this percent USED", type=float) parser.add_argument('-V','--version', help='show version and exit', action='store_true') args = parser.parse_args() # validate args ceph_exec = args.exe if args.exe else CEPH_COMMAND if not os.path.exists(ceph_exec): print("ERROR: ceph executable '%s' doesn't exist" % ceph_exec) return STATUS_UNKNOWN if args.version: print('version %s' % __version__) return STATUS_OK if args.conf and not os.path.exists(args.conf): print("ERROR: ceph conf file '%s' doesn't exist" % args.conf) return STATUS_UNKNOWN if args.keyring and not os.path.exists(args.keyring): print("ERROR: keyring file '%s' doesn't exist" % args.keyring) return STATUS_UNKNOWN if not args.warn or not args.critical or args.warn > args.critical: print("ERROR: warn and critical level must be set and critical must be greater than warn") return STATUS_UNKNOWN # build command ceph_osd_df = [ceph_exec] if args.monaddress: ceph_osd_df.append('-m') ceph_osd_df.append(args.monaddress) if args.conf: ceph_osd_df.append('-c') ceph_osd_df.append(args.conf) if args.id: ceph_osd_df.append('--id') ceph_osd_df.append(args.id) if args.name: ceph_osd_df.append('--name') ceph_osd_df.append(args.name) if args.keyring: ceph_osd_df.append('--keyring') ceph_osd_df.append(args.keyring) ceph_osd_df.append('osd') ceph_osd_df.append('df') ceph_osd_df.append('--format=json') # exec command p = subprocess.Popen(ceph_osd_df,stdout=subprocess.PIPE,stderr=subprocess.PIPE) output, err = p.communicate() # parse output # print "DEBUG: output:", output # print "DEBUG: err:", err if output: # parse output try: result = json.loads(output) check_return_value = STATUS_OK nodes_sorted = sorted(result["nodes"], key=itemgetter('utilization','id')) warn_crit_osds = [] for node in reversed(nodes_sorted): if node["utilization"] >= args.warn and check_return_value is not STATUS_ERROR: check_return_value = STATUS_WARNING warn_crit_osds.append("{}={:04.2f}".format(node["name"], node["utilization"])) if node["utilization"] >= args.critical: check_return_value = STATUS_ERROR warn_crit_osds.append("{}={:04.2f}".format(node["name"], node["utilization"])) if check_return_value == STATUS_OK: print("OK: All OSDs within limits") return STATUS_OK elif check_return_value == STATUS_WARNING: print("WARNING: OSD usage above warn threshold: {:.4054}".format(", ".join(warn_crit_osds))) return STATUS_WARNING elif check_return_value == STATUS_ERROR: print("CRITICAL: OSD usage above critical or warn threshold: {:.4041}".format(", ".join(warn_crit_osds))) return STATUS_ERROR except: print("ERROR: {}".format(sys.exc_info()[0])) return STATUS_UNKNOWN elif err: # read only first line of error one_line = err.split('\n')[0] if '-1 ' in one_line: idx = one_line.rfind('-1 ') print('ERROR: %s: %s' % (ceph_exec, one_line[idx+len('-1 '):])) else: print(one_line) return STATUS_UNKNOWN if __name__ == "__main__": sys.exit(main())