From 644793d2ecf4e202fc522ff80c7d184671ddb31e Mon Sep 17 00:00:00 2001 From: Alexis Ben Miloud--Josselin Date: Mon, 1 Aug 2022 17:43:00 +0200 Subject: [PATCH] Ajouter check_rabbitmq pour Python 3 --- rabbitmq/files/check_rabbitmq.python3 | 226 ++++++++++++++++++++++++++ rabbitmq/tasks/nrpe.yml | 11 ++ 2 files changed, 237 insertions(+) create mode 100644 rabbitmq/files/check_rabbitmq.python3 diff --git a/rabbitmq/files/check_rabbitmq.python3 b/rabbitmq/files/check_rabbitmq.python3 new file mode 100644 index 00000000..0a941dd4 --- /dev/null +++ b/rabbitmq/files/check_rabbitmq.python3 @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 +from optparse import OptionParser +import shlex +import subprocess +import sys +import requests +import json + +if "check_output" not in dir( subprocess ): # duck punch it in! + def f(*popenargs, **kwargs): + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise subprocess.CalledProcessError(retcode, cmd) + return output + subprocess.check_output = f + + +class RabbitCmdWrapper(object): + """So basically this just runs rabbitmqctl commands and returns parsed output. + Typically this means you need root privs for this to work. + Made this it's own class so it could be used in other monitoring tools + if desired.""" + + @classmethod + def list_connections(cls): + args = shlex.split("sudo rabbitmqctl list_connections") + cmd_result = subprocess.check_output(args, text=True).strip() + results = cls._parse_list_results(cmd_result) + return results + + @classmethod + def list_queues(cls): + args = shlex.split('sudo rabbitmqctl list_queues') + cmd_result = subprocess.check_output(args, text=True).strip() + results = cls._parse_list_results(cmd_result) + return results + + @classmethod + def status(cls): + args = shlex.split('sudo rabbitmqctl status') + cmd_result = subprocess.check_output(args, text=True).strip() + results = cls._parse_list_results(cmd_result) + return results + + @classmethod + def _parse_list_results(cls, result_string): + results = result_string.strip().split('\n') + #remove text fluff + if "Listing connections ..." in results: results.remove("Listing connections ...") + if "Listing queues ..." in results: results.remove("Listing queues ...") + return_data = [] + for row in results: + return_data.append(row.split('\t')) + return return_data + + +def check_connection_count(critical=0, warning=0): + """Checks to make sure the numbers of connections are within parameters.""" + try: + count = len(RabbitCmdWrapper.list_connections()) + if count >= critical: + print("CRITICAL - Connection Count %d" % count) + sys.exit(2) + elif count >= warning: + print("WARNING - Connection Count %d" % count) + sys.exit(1) + else: + print("OK - Connection Count %d" % count) + except Exception as err: + print("CRITICAL - %s" % err) + + +def check_queues_count(critical=1000, warning=1000): + """ + A blanket check to make sure all queues are within count parameters. + TODO: Possibly break this out so test can be done on individual queues. + """ + try: + critical_q = [] + warning_q = [] + results = RabbitCmdWrapper.list_queues() + for queue in results: + if queue.count == 2: + count = int(queue[1]) + if count >= critical: + critical_q.append("%s: %s" % (queue[0], count)) + elif count >= warning: + warning_q.append("%s: %s" % (queue[0], count)) + if critical_q: + print("CRITICAL - %s" % ", ".join(critical_q)) + sys.exit(2) + elif warning_q: + print("WARNING - %s" % ", ".join(warning_q)) + sys.exit(1) + else: + print("OK - NO QUEUES EXCEED THRESHOLDS") + sys.exit(0) + except Exception as err: + print("CRITICAL - %s" % err) + sys.exit(2) + +def check_mem_usage(critical=75, warning=50): + """Check to make sure the RAM usage of rabbitmq process does not exceed 50%% of its max""" + try: + results = RabbitCmdWrapper.status() + + for idx,val in enumerate(results): + if "memory," in str(val): + mem_used_raw = str(results[idx + 1]) + if "vm_memory_limit" in str(val): + mem_limit_raw = str(val) + + memory_used = float(filter(str.isdigit, mem_used_raw)) + memory_limit = float(filter(str.isdigit, mem_limit_raw)) + percent_usage = int(memory_used/memory_limit * 100) + + if percent_usage > critical: + print("CRITICAL - RABBITMQ RAM USAGE at %s%% of max" % percent_usage) + sys.exit(2) + elif percent_usage > warning: + print("WARNING - RABBITMQ RAM USAGE at %s%% of max" % percent_usage) + sys.exit(1) + else: + print("OK - RABBITMQ RAM USAGE OK at %s%% of max" % percent_usage) + sys.exit(0) + except Exception as err: + print("Critical - %s" % err) + sys.exit(2) + +def check_aliveness(username, password, timeout, cluster): + """Declares a test queue, then publishes and consumes a message. Intended for use by monitoring tools. If everything is working correctly, will return HTTP status 200 with body""" + try: + r = requests.get("http://%s:15672/api/aliveness-test/%%2F" % cluster, auth=(username, password), timeout=timeout) + except requests.exceptions.RequestException as e: # Throw error if rabbitmq is down + print("Critical - %s" % e) + sys.exit(2) + if r.status_code == 200: + print("OK - RABBITMQ Aliveness Test Returns: %s" % r) + sys.exit(0) + elif r.status_code != 200: + print("CRITICAL - RabbitMQ Error: %s" % r.content) + sys.exit(2) + else: + print("UNKNOWN - RABBITMQ Aliveness Test") + sys.ext(1) + +def check_cluster(username, password, timeout, cluster): + """Checks the health of a cluster, if a node is not running mark as offline """ + try: + url = "http://%s:15672/api/nodes" % cluster + r = requests.get(url, auth=(username, password), timeout=timeout) + except requests.exceptions.RequestException as e: # Throw error if no response + print("Critical - %s" % e) + sys.exit(2) + text = r.text + nodes = json.loads(text) + + running_nodes = [] + failed_nodes = [] + for node in nodes: + if not node['running']: + failed_nodes.append(node['name']) + if node['running']: + running_nodes.append(node['name']) + if len(failed_nodes) == 1: + print("WARNING: RabbitMQ cluster is degraged: Not running %s" % failed_nodes[0]) + sys.exit(1) + elif len(failed_nodes) >= 2: + print("CRITICAL: RabbitMQ cluster is critical: Not running %s" % failed_nodes) + sys.exit(2) + else: + print("OK: RabbitMQ cluster members: %s" % (" ".join(running_nodes))) + sys.exit(0) + + +USAGE = """Usage: ./check_rabbitmq -a [action] -C [critical] -W [warning] + Actions: + - connection_count + checks the number of connection in rabbitmq's list_connections + - queues_count + checks the count in each of the queues in rabbitmq's list_queues + - mem_usage + checks to ensure mem usage of rabbitmq process does not exceed 50% + - aliveness + Use the /api/aliveness-test API to send/receive a message. (requires -u username -p password args) + - cluster_status + Parse /api/nodes to check the cluster status. (requires -u username -p password""" + +if __name__ == "__main__": + parser = OptionParser(USAGE) + parser.add_option("-a", "--action", dest="action", + help="Action to Check") + parser.add_option("-C", "--critical", dest="critical", + type="int", help="Critical Threshold") + parser.add_option("-W", "--warning", dest="warning", + type="int", help="Warning Threshold") + parser.add_option("-u", "--username", dest="username", default="guest", + type="string", help="RabbitMQ username, Default guest") + parser.add_option("-p", "--password", dest="password", default="guest", + type="string", help="RabbitMQ password, Default guest") + parser.add_option("-t", "--timeout", dest="timeout", default=1, + type="int", help="Request Timeout, defaults to 1 second") + parser.add_option("-c", "--cluster", dest="cluster", default="localhost", + type="string", help="Cluster IP/DNS name, defaults to localhost") + (options, args) = parser.parse_args() + + if options.action == "connection_count": + check_connection_count(options.critical, options.warning) + elif options.action == "queues_count": + check_queues_count(options.critical, options.warning) + elif options.action == "mem_usage": + check_mem_usage(options.critical, options.warning) + elif options.action == "aliveness": + check_aliveness(options.username, options.password, options.timeout, options.cluster) + elif options.action == "cluster_status": + check_cluster(options.username, options.password, options.timeout, options.cluster) + else: + print("Invalid action: %s" % options.action) + print(USAGE) diff --git a/rabbitmq/tasks/nrpe.yml b/rabbitmq/tasks/nrpe.yml index 4272f57b..ba6b8d47 100644 --- a/rabbitmq/tasks/nrpe.yml +++ b/rabbitmq/tasks/nrpe.yml @@ -24,6 +24,17 @@ group: root mode: "0755" force: yes + when: ansible_distribution_major_version is version('11', '<=') + +- name: check_rabbitmq (Python 3 version) is installed + copy: + src: check_rabbitmq.python3 + dest: /usr/local/lib/nagios/plugins/check_rabbitmq + owner: root + group: root + mode: "0755" + force: yes + when: ansible_distribution_major_version is version('11', '==') - name: check_rabbitmq is available for NRPE lineinfile: