Ajouter check_rabbitmq pour Python 3
gitea/ansible-roles/pipeline/head Something is wrong with the build of this commit
Details
gitea/ansible-roles/pipeline/head Something is wrong with the build of this commit
Details
This commit is contained in:
parent
70225180eb
commit
644793d2ec
|
@ -0,0 +1,226 @@
|
|||
#!/usr/bin/env python3
|
||||
from optparse import OptionParser
|
||||
import shlex
|
||||
import subprocess
|
||||
import sys
|
||||
import requests
|
||||
import json
|
||||
|
||||
if "check_output" not in dir( subprocess ): # duck punch it in!
|
||||
def f(*popenargs, **kwargs):
|
||||
if 'stdout' in kwargs:
|
||||
raise ValueError('stdout argument not allowed, it will be overridden.')
|
||||
process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
|
||||
output, unused_err = process.communicate()
|
||||
retcode = process.poll()
|
||||
if retcode:
|
||||
cmd = kwargs.get("args")
|
||||
if cmd is None:
|
||||
cmd = popenargs[0]
|
||||
raise subprocess.CalledProcessError(retcode, cmd)
|
||||
return output
|
||||
subprocess.check_output = f
|
||||
|
||||
|
||||
class RabbitCmdWrapper(object):
|
||||
"""So basically this just runs rabbitmqctl commands and returns parsed output.
|
||||
Typically this means you need root privs for this to work.
|
||||
Made this it's own class so it could be used in other monitoring tools
|
||||
if desired."""
|
||||
|
||||
@classmethod
|
||||
def list_connections(cls):
|
||||
args = shlex.split("sudo rabbitmqctl list_connections")
|
||||
cmd_result = subprocess.check_output(args, text=True).strip()
|
||||
results = cls._parse_list_results(cmd_result)
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
def list_queues(cls):
|
||||
args = shlex.split('sudo rabbitmqctl list_queues')
|
||||
cmd_result = subprocess.check_output(args, text=True).strip()
|
||||
results = cls._parse_list_results(cmd_result)
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
def status(cls):
|
||||
args = shlex.split('sudo rabbitmqctl status')
|
||||
cmd_result = subprocess.check_output(args, text=True).strip()
|
||||
results = cls._parse_list_results(cmd_result)
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
def _parse_list_results(cls, result_string):
|
||||
results = result_string.strip().split('\n')
|
||||
#remove text fluff
|
||||
if "Listing connections ..." in results: results.remove("Listing connections ...")
|
||||
if "Listing queues ..." in results: results.remove("Listing queues ...")
|
||||
return_data = []
|
||||
for row in results:
|
||||
return_data.append(row.split('\t'))
|
||||
return return_data
|
||||
|
||||
|
||||
def check_connection_count(critical=0, warning=0):
|
||||
"""Checks to make sure the numbers of connections are within parameters."""
|
||||
try:
|
||||
count = len(RabbitCmdWrapper.list_connections())
|
||||
if count >= critical:
|
||||
print("CRITICAL - Connection Count %d" % count)
|
||||
sys.exit(2)
|
||||
elif count >= warning:
|
||||
print("WARNING - Connection Count %d" % count)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("OK - Connection Count %d" % count)
|
||||
except Exception as err:
|
||||
print("CRITICAL - %s" % err)
|
||||
|
||||
|
||||
def check_queues_count(critical=1000, warning=1000):
|
||||
"""
|
||||
A blanket check to make sure all queues are within count parameters.
|
||||
TODO: Possibly break this out so test can be done on individual queues.
|
||||
"""
|
||||
try:
|
||||
critical_q = []
|
||||
warning_q = []
|
||||
results = RabbitCmdWrapper.list_queues()
|
||||
for queue in results:
|
||||
if queue.count == 2:
|
||||
count = int(queue[1])
|
||||
if count >= critical:
|
||||
critical_q.append("%s: %s" % (queue[0], count))
|
||||
elif count >= warning:
|
||||
warning_q.append("%s: %s" % (queue[0], count))
|
||||
if critical_q:
|
||||
print("CRITICAL - %s" % ", ".join(critical_q))
|
||||
sys.exit(2)
|
||||
elif warning_q:
|
||||
print("WARNING - %s" % ", ".join(warning_q))
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("OK - NO QUEUES EXCEED THRESHOLDS")
|
||||
sys.exit(0)
|
||||
except Exception as err:
|
||||
print("CRITICAL - %s" % err)
|
||||
sys.exit(2)
|
||||
|
||||
def check_mem_usage(critical=75, warning=50):
|
||||
"""Check to make sure the RAM usage of rabbitmq process does not exceed 50%% of its max"""
|
||||
try:
|
||||
results = RabbitCmdWrapper.status()
|
||||
|
||||
for idx,val in enumerate(results):
|
||||
if "memory," in str(val):
|
||||
mem_used_raw = str(results[idx + 1])
|
||||
if "vm_memory_limit" in str(val):
|
||||
mem_limit_raw = str(val)
|
||||
|
||||
memory_used = float(filter(str.isdigit, mem_used_raw))
|
||||
memory_limit = float(filter(str.isdigit, mem_limit_raw))
|
||||
percent_usage = int(memory_used/memory_limit * 100)
|
||||
|
||||
if percent_usage > critical:
|
||||
print("CRITICAL - RABBITMQ RAM USAGE at %s%% of max" % percent_usage)
|
||||
sys.exit(2)
|
||||
elif percent_usage > warning:
|
||||
print("WARNING - RABBITMQ RAM USAGE at %s%% of max" % percent_usage)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("OK - RABBITMQ RAM USAGE OK at %s%% of max" % percent_usage)
|
||||
sys.exit(0)
|
||||
except Exception as err:
|
||||
print("Critical - %s" % err)
|
||||
sys.exit(2)
|
||||
|
||||
def check_aliveness(username, password, timeout, cluster):
|
||||
"""Declares a test queue, then publishes and consumes a message. Intended for use by monitoring tools. If everything is working correctly, will return HTTP status 200 with body"""
|
||||
try:
|
||||
r = requests.get("http://%s:15672/api/aliveness-test/%%2F" % cluster, auth=(username, password), timeout=timeout)
|
||||
except requests.exceptions.RequestException as e: # Throw error if rabbitmq is down
|
||||
print("Critical - %s" % e)
|
||||
sys.exit(2)
|
||||
if r.status_code == 200:
|
||||
print("OK - RABBITMQ Aliveness Test Returns: %s" % r)
|
||||
sys.exit(0)
|
||||
elif r.status_code != 200:
|
||||
print("CRITICAL - RabbitMQ Error: %s" % r.content)
|
||||
sys.exit(2)
|
||||
else:
|
||||
print("UNKNOWN - RABBITMQ Aliveness Test")
|
||||
sys.ext(1)
|
||||
|
||||
def check_cluster(username, password, timeout, cluster):
|
||||
"""Checks the health of a cluster, if a node is not running mark as offline """
|
||||
try:
|
||||
url = "http://%s:15672/api/nodes" % cluster
|
||||
r = requests.get(url, auth=(username, password), timeout=timeout)
|
||||
except requests.exceptions.RequestException as e: # Throw error if no response
|
||||
print("Critical - %s" % e)
|
||||
sys.exit(2)
|
||||
text = r.text
|
||||
nodes = json.loads(text)
|
||||
|
||||
running_nodes = []
|
||||
failed_nodes = []
|
||||
for node in nodes:
|
||||
if not node['running']:
|
||||
failed_nodes.append(node['name'])
|
||||
if node['running']:
|
||||
running_nodes.append(node['name'])
|
||||
if len(failed_nodes) == 1:
|
||||
print("WARNING: RabbitMQ cluster is degraged: Not running %s" % failed_nodes[0])
|
||||
sys.exit(1)
|
||||
elif len(failed_nodes) >= 2:
|
||||
print("CRITICAL: RabbitMQ cluster is critical: Not running %s" % failed_nodes)
|
||||
sys.exit(2)
|
||||
else:
|
||||
print("OK: RabbitMQ cluster members: %s" % (" ".join(running_nodes)))
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
USAGE = """Usage: ./check_rabbitmq -a [action] -C [critical] -W [warning]
|
||||
Actions:
|
||||
- connection_count
|
||||
checks the number of connection in rabbitmq's list_connections
|
||||
- queues_count
|
||||
checks the count in each of the queues in rabbitmq's list_queues
|
||||
- mem_usage
|
||||
checks to ensure mem usage of rabbitmq process does not exceed 50%
|
||||
- aliveness
|
||||
Use the /api/aliveness-test API to send/receive a message. (requires -u username -p password args)
|
||||
- cluster_status
|
||||
Parse /api/nodes to check the cluster status. (requires -u username -p password"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = OptionParser(USAGE)
|
||||
parser.add_option("-a", "--action", dest="action",
|
||||
help="Action to Check")
|
||||
parser.add_option("-C", "--critical", dest="critical",
|
||||
type="int", help="Critical Threshold")
|
||||
parser.add_option("-W", "--warning", dest="warning",
|
||||
type="int", help="Warning Threshold")
|
||||
parser.add_option("-u", "--username", dest="username", default="guest",
|
||||
type="string", help="RabbitMQ username, Default guest")
|
||||
parser.add_option("-p", "--password", dest="password", default="guest",
|
||||
type="string", help="RabbitMQ password, Default guest")
|
||||
parser.add_option("-t", "--timeout", dest="timeout", default=1,
|
||||
type="int", help="Request Timeout, defaults to 1 second")
|
||||
parser.add_option("-c", "--cluster", dest="cluster", default="localhost",
|
||||
type="string", help="Cluster IP/DNS name, defaults to localhost")
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if options.action == "connection_count":
|
||||
check_connection_count(options.critical, options.warning)
|
||||
elif options.action == "queues_count":
|
||||
check_queues_count(options.critical, options.warning)
|
||||
elif options.action == "mem_usage":
|
||||
check_mem_usage(options.critical, options.warning)
|
||||
elif options.action == "aliveness":
|
||||
check_aliveness(options.username, options.password, options.timeout, options.cluster)
|
||||
elif options.action == "cluster_status":
|
||||
check_cluster(options.username, options.password, options.timeout, options.cluster)
|
||||
else:
|
||||
print("Invalid action: %s" % options.action)
|
||||
print(USAGE)
|
|
@ -24,6 +24,17 @@
|
|||
group: root
|
||||
mode: "0755"
|
||||
force: yes
|
||||
when: ansible_distribution_major_version is version('11', '<=')
|
||||
|
||||
- name: check_rabbitmq (Python 3 version) is installed
|
||||
copy:
|
||||
src: check_rabbitmq.python3
|
||||
dest: /usr/local/lib/nagios/plugins/check_rabbitmq
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0755"
|
||||
force: yes
|
||||
when: ansible_distribution_major_version is version('11', '==')
|
||||
|
||||
- name: check_rabbitmq is available for NRPE
|
||||
lineinfile:
|
||||
|
|
Loading…
Reference in New Issue