From 1e6adc6a1abafbeaae38c7c1c6d48027ad78b2cc Mon Sep 17 00:00:00 2001 From: benoit Date: Wed, 11 Aug 2021 19:09:14 +0200 Subject: [PATCH] First working version --- .flake8 | 10 + .gitignore | 2 + README.md | 0 check_patroni/cli.py | 526 +++++++++++++++++- check_patroni/cluster.py | 164 ++++++ check_patroni/node.py | 205 +++++++ check_patroni/types.py | 63 +++ config.ini | 9 + mypy.ini | 5 + setup.py | 31 +- .../test.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 3375 bytes ...ig_has_changed.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 3647 bytes ...ter_has_leader.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 1568 bytes ...er_has_replica.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 1643 bytes ...ter_node_count.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 3326 bytes ..._node_is_alive.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 1504 bytes ...ending_restart.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 1598 bytes ...ode_is_primary.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 1542 bytes ...ode_is_replica.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 1928 bytes ...atroni_version.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 1594 bytes ...tl_has_changed.cpython-39-pytest-6.2.4.pyc | Bin 0 -> 3489 bytes test/__pycache__/tools.cpython-39.pyc | Bin 0 -> 1317 bytes test/json/cluster_config_has_changed.json | 16 + test/json/cluster_has_leader_ko.json | 33 ++ test/json/cluster_has_leader_ok.json | 33 ++ test/json/cluster_has_replica_ok.json | 33 ++ test/json/cluster_node_count.json | 32 ++ test/json/cluster_node_count_critical.json | 13 + test/json/cluster_node_count_ok.json | 33 ++ .../cluster_node_count_running_critical.json | 31 ++ .../cluster_node_count_running_warning.json | 23 + test/json/cluster_node_count_warning.json | 23 + test/json/node_is_alive.json | 19 + test/json/node_is_pending_restart_ko.json | 27 + test/json/node_is_pending_restart_ok.json | 26 + test/json/node_is_primary_ko.json | 19 + test/json/node_is_primary_ok.json | 26 + test/json/node_is_replica_ko.json | 26 + test/json/node_is_replica_ok.json | 19 + test/json/node_patroni_version.json | 26 + test/json/node_tl_has_changed.json | 26 + test/test_cluster_config_has_changed.py | 103 ++++ test/test_cluster_has_leader.py | 29 + test/test_cluster_has_replica.py | 36 ++ test/test_cluster_node_count.py | 115 ++++ test/test_node_is_alive.py | 22 + test/test_node_is_pending_restart.py | 26 + test/test_node_is_primary.py | 22 + test/test_node_is_replica.py | 33 ++ test/test_node_patroni_version.py | 40 ++ test/test_node_tl_has_changed.py | 104 ++++ test/tools.py | 26 + 52 files changed, 2033 insertions(+), 22 deletions(-) create mode 100644 .flake8 create mode 100644 .gitignore create mode 100644 README.md create mode 100644 check_patroni/cluster.py create mode 100644 check_patroni/node.py create mode 100644 check_patroni/types.py create mode 100644 config.ini create mode 100644 mypy.ini create mode 100644 test/__pycache__/test.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_cluster_config_has_changed.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_cluster_has_leader.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_cluster_has_replica.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_cluster_node_count.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_node_is_alive.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_node_is_pending_restart.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_node_is_primary.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_node_is_replica.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_node_patroni_version.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/test_node_tl_has_changed.cpython-39-pytest-6.2.4.pyc create mode 100644 test/__pycache__/tools.cpython-39.pyc create mode 100644 test/json/cluster_config_has_changed.json create mode 100644 test/json/cluster_has_leader_ko.json create mode 100644 test/json/cluster_has_leader_ok.json create mode 100644 test/json/cluster_has_replica_ok.json create mode 100644 test/json/cluster_node_count.json create mode 100644 test/json/cluster_node_count_critical.json create mode 100644 test/json/cluster_node_count_ok.json create mode 100644 test/json/cluster_node_count_running_critical.json create mode 100644 test/json/cluster_node_count_running_warning.json create mode 100644 test/json/cluster_node_count_warning.json create mode 100644 test/json/node_is_alive.json create mode 100644 test/json/node_is_pending_restart_ko.json create mode 100644 test/json/node_is_pending_restart_ok.json create mode 100644 test/json/node_is_primary_ko.json create mode 100644 test/json/node_is_primary_ok.json create mode 100644 test/json/node_is_replica_ko.json create mode 100644 test/json/node_is_replica_ok.json create mode 100644 test/json/node_patroni_version.json create mode 100644 test/json/node_tl_has_changed.json create mode 100644 test/test_cluster_config_has_changed.py create mode 100644 test/test_cluster_has_leader.py create mode 100644 test/test_cluster_has_replica.py create mode 100644 test/test_cluster_node_count.py create mode 100644 test/test_node_is_alive.py create mode 100644 test/test_node_is_pending_restart.py create mode 100644 test/test_node_is_primary.py create mode 100644 test/test_node_is_replica.py create mode 100644 test/test_node_patroni_version.py create mode 100644 test/test_node_tl_has_changed.py create mode 100644 test/tools.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..158aaae --- /dev/null +++ b/.flake8 @@ -0,0 +1,10 @@ +[flake8] +doctests = True +ignore = + E501, # line too long +exclude = + .git, + .mypy_cache, + .tox, + .venv, +mypy_config = mypy.ini diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4b9e47f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +check_patroni/__pycache__/ +test/*.state_file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/check_patroni/cli.py b/check_patroni/cli.py index 036c39f..e6f009f 100644 --- a/check_patroni/cli.py +++ b/check_patroni/cli.py @@ -1,17 +1,521 @@ -import requests +import click +from configparser import ConfigParser +import nagiosplugin +import re +from typing import List + +from . import __version__ +from .cluster import ( + ClusterConfigHasChanged, + ClusterConfigHasChangedSummary, + ClusterHasLeader, + ClusterHasLeaderSummary, + ClusterHasReplica, + ClusterNodeCount, +) +from .node import ( + NodeIsAlive, + NodeIsAliveSummary, + NodeIsPendingRestart, + NodeIsPendingRestartSummary, + NodeIsPrimary, + NodeIsPrimarySummary, + NodeIsReplica, + NodeIsReplicaSummary, + NodePatroniVersion, + NodePatroniVersionSummary, + NodeTLHasChanged, + NodeTLHasChangedSummary, +) +from .types import ConnectionInfo -def check_is_master(address: str = "127.0.0.1", port: int = 8008): - r = requests.get(f"{address}:{int(port)}/leader") - return r.status_code == 200 +def print_version(ctx: click.Context, param: str, value: str) -> None: + if not value or ctx.resilient_parsing: + return + click.echo(f"Version {__version__}") + ctx.exit() -def check_is_replica(address: str = "127.0.0.1", port: int = 8008): - r = requests.get(f"{address}:{int(port)}/replica") - return r.status_code == 200 +DEFAULT_CFG = "config.ini" -def main() -> None: - print(check_is_master()) - print(check_is_replica()) - print("allgood") +def configure(ctx: click.Context, param: str, filename: str) -> None: + """Use a config file for the parameters + stolen from https://jwodder.github.io/kbits/posts/click-config/ + """ + # FIXME should use click-configfile / click-config-file ? + cfg = ConfigParser() + cfg.read(filename) + ctx.default_map = {} + for sect in cfg.sections(): + command_path = sect.split(".") + if command_path[0] != "options": + continue + defaults = ctx.default_map + for cmdname in command_path[1:]: + defaults = defaults.setdefault(cmdname, {}) + defaults.update(cfg[sect]) + try: + # endpoints is an array of addresses separated by , + if isinstance(defaults["endpoints"], str): + defaults["endpoints"] = re.split(r"\s*,\s*", defaults["endpoints"]) + except KeyError: + pass + + +@click.group() +@click.option( + "--config", + type=click.Path(dir_okay=False), + default=DEFAULT_CFG, + callback=configure, + is_eager=True, + expose_value=False, + help="Read option defaults from the specified INI file", + show_default=True, +) +@click.option( + "-e", + "--endpoints", + "endpoints", + type=str, + multiple=True, + default=["http://127.0.0.1:8008"], + help="API endpoint. Can be specified multiple times.", +) +@click.option( + "--cert_file", + "cert_file", + type=str, + help="File with the client certificate.", +) +@click.option( + "--key_file", + "key_file", + type=str, + help="File with the client key.", +) +@click.option( + "--ca_file", + "ca_file", + type=str, + help="The CA certificate.", +) +@click.option( + "-v", + "--verbose", + "verbose", + count=True, + help="Increase verbosity -v (info)/-vv (warning)/-vvv (debug)", +) +@click.option( + "--version", is_flag=True, callback=print_version, expose_value=False, is_eager=True +) +@click.option( + "--timeout", + "timeout", + default=2, + type=int, + help="Timeout in seconds for the API queries (0 to disable)", +) +@click.pass_context +@nagiosplugin.guarded +def main( + ctx: click.Context, + endpoints: List[str], + cert_file: str, + key_file: str, + ca_file: str, + verbose: bool, + timeout: int, +) -> None: + """Nagios plugin for patroni.""" + ctx.obj = ConnectionInfo(endpoints, cert_file, key_file, ca_file) + + # TODO Not all "is/has" services have the same return code for ok. Check if it's ok + # Typing + + +@main.command(name="cluster_node_count") # required otherwise _ are converted to - +@click.option( + "-w", + "--warning", + "warning", + type=str, + help="Warning threshold for the number of nodes.", +) +@click.option( + "-c", + "--critical", + "critical", + type=str, + help="Critical threshold for the nimber of nodes.", +) +@click.option( + "--running-warning", + "running_warning", + type=str, + help="Warning threshold for the number of running nodes.", +) +@click.option( + "--running-critical", + "running_critical", + type=str, + help="Critical threshold for the nimber of running nodes.", +) +@click.pass_context +@nagiosplugin.guarded +def cluster_node_count( + ctx: click.Context, + warning: str, + critical: str, + running_warning: str, + running_critical: str, +) -> None: + """Count the number of nodes in the cluster. + + \b + Check: + * Compares the number of nodes against the normal and running node warning and critical thresholds. + * `OK`! If they are not provided. + + \b + Perfdata: + * `members`: the member count. + * all the roles of the nodes in the cluster with their number. + """ + check = nagiosplugin.Check() + check.add( + ClusterNodeCount(ctx.obj), + nagiosplugin.ScalarContext( + "members", + warning, + critical, + ), + nagiosplugin.ScalarContext( + "state_running", + running_warning, + running_critical, + ), + nagiosplugin.ScalarContext("members_roles"), + nagiosplugin.ScalarContext("members_statuses"), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) + + +@main.command(name="cluster_has_leader") +@click.pass_context +@nagiosplugin.guarded +def cluster_has_leader(ctx: click.Context) -> None: + """Check if the cluster has a leader. + + \b + Check: + * `OK`: if there is a leader node. + * `CRITICAL`: otherwise + + Perfdata : `has_leader` is 1 if there is a leader node, 0 otherwise + """ + # TODO: Manage primary or standby leader in the same place ? + check = nagiosplugin.Check() + check.add( + ClusterHasLeader(ctx.obj), + nagiosplugin.ScalarContext("has_leader", None, "@0:0"), + ClusterHasLeaderSummary(), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) + + +@main.command(name="cluster_has_replica") +@click.option( + "-w", + "--warning", + "warning", + type=str, + help="Warning threshold for the number of nodes.", +) +@click.option( + "-c", + "--critical", + "critical", + type=str, + help="Critical threshold for the number of replica nodes.", +) +@click.option( + "--lag-warning", "lag_warning", type=str, help="Warning threshold for the lag." +) +# FIWME how do we manage maximum_lag_on_failover without doing many api calls +@click.option( + "--lag-critical", "lag_critical", type=str, help="Critical threshold for the lag." +) +@click.pass_context +@nagiosplugin.guarded +def cluster_has_replica( + ctx: click.Context, warning: str, critical: str, lag_warning: str, lag_critical: str +) -> None: + """Check if the cluster has replicas and their lag. + + \b + Check: + * `OK`: if the replica count and their lag are compatible with the replica count and lag thresholds. + * `WARNING` / `CRITICAL`: otherwise + + \b + Perfdata : + * replica count + * the lag of each replica labelled with "member name"_lag + """ + # FIXME the idea here would be to make sur we have a replica. + # lag should be check to prune invalid replicas + check = nagiosplugin.Check() + check.add( + ClusterHasReplica(ctx.obj), + nagiosplugin.ScalarContext( + "replica_count", + warning, + critical, + ), + nagiosplugin.ScalarContext( + "replica_lag", + lag_warning, + lag_critical, + ), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) + + +@main.command(name="cluster_config_has_changed") +@click.option("--hash", "config_hash", type=str, help="A hash to compare with.") +@click.option( + "-s", + "--state-file", + "state_file", + type=str, + help="A state file to store the tl number into.", +) +@click.pass_context +@nagiosplugin.guarded +def cluster_config_has_changed( + ctx: click.Context, config_hash: str, state_file: str +) -> None: + """Check if the hash of the configuration has changed. + + Note: either a hash or a state file must be provided for this service to work. + + \b + Check: + * `OK`: The hash didn't change + * `CRITICAL`: The hash of the configuration has changed compared to the input (`--hash`) or last time (`--state_file`) + + \b + Perfdata : + * `is_configuration_changed` is 1 if the configuration has changed + """ + # FIXME hash in perfdata ? + if (config_hash is None and state_file is None) or ( + config_hash is not None and state_file is not None + ): + raise click.UsageError( + "Either --hash or --state-file should be provided for this service", ctx + ) + + check = nagiosplugin.Check() + check.add( + ClusterConfigHasChanged(ctx.obj, config_hash, state_file), + nagiosplugin.ScalarContext("is_configuration_changed", None, "@1:1"), + ClusterConfigHasChangedSummary(), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) + + +@main.command(name="node_is_primary") +@click.pass_context +@nagiosplugin.guarded +def node_is_primary(ctx: click.Context) -> None: + """Check if the node is the primary with the leader lock. + + \b + Check: + * `OK`: if the node is a primary with the leader lock. + * `CRITICAL:` otherwise + + Perfdata: `is_primary` is 1 if the node is a primary with the leader lock, 0 otherwise. + """ + check = nagiosplugin.Check() + check.add( + NodeIsPrimary(ctx.obj), + nagiosplugin.ScalarContext("is_primary", None, "@0:0"), + NodeIsPrimarySummary(), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) + + +@main.command(name="node_is_replica") +@click.option("--lag", "lag", type=str, help="maximum allowed lag") +@click.pass_context +@nagiosplugin.guarded +def node_is_replica(ctx: click.Context, lag: str) -> None: + """Check if the node is a running replica with no noloadbalance tag. + + \b + Check: + * `OK`: if the node is a running replica with noloadbalance tag and the lag is under the maximum threshold. + * `CRITICAL`: otherwise + + Perfdata : `is_replica` is 1 if the node is a running replica with noloadbalance tag and the lag is under the maximum threshold, 0 otherwise. + """ + # add a lag check ?? + check = nagiosplugin.Check() + check.add( + NodeIsReplica(ctx.obj, lag), + nagiosplugin.ScalarContext("is_replica", None, "@0:0"), + NodeIsReplicaSummary(lag), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) + + +@main.command(name="node_is_pending_restart") +@click.pass_context +@nagiosplugin.guarded +def node_is_pending_restart(ctx: click.Context) -> None: + """Check if the node is in pending restart state. + + This situation can arise if the configuration has been modified but + requiers arestart of PostgreSQL. + + \b + Check: + * `OK`: if the node has pending restart tag. + * `CRITICAL`: otherwise + + Perfdata: `is_pending_restart` is 1 if the node has pending restart tag, 0 otherwise. + """ + check = nagiosplugin.Check() + check.add( + NodeIsPendingRestart(ctx.obj), + nagiosplugin.ScalarContext("is_pending_restart", None, "@1:1"), + NodeIsPendingRestartSummary(), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) + + +@main.command(name="node_tl_has_changed") +@click.option( + "--timeline", "timeline", type=str, help="A timeline number to compare with." +) +@click.option( + "-s", + "--state-file", + "state_file", + type=str, + help="A state file to store the last tl number into.", +) +@click.pass_context +@nagiosplugin.guarded +def node_tl_has_changed(ctx: click.Context, timeline: str, state_file: str) -> None: + """Check if the timeline hash changed. + + Note: either a timeline or a state file must be provided for this service to work. + + \b + Check: + * `OK`: The timeline is the same as last time (`--state_file`) or the inputed timeline (`--timeline`) + * `CRITICAL`: The tl is not the same. + + \b + Perfdata : + * `is_configuration_changed` is 1 if the configuration has changed, 0 otherwise + """ + if (timeline is None and state_file is None) or ( + timeline is not None and state_file is not None + ): + raise click.UsageError( + "Either --timeline or --state-file should be provided for this service", ctx + ) + + check = nagiosplugin.Check() + check.add( + NodeTLHasChanged(ctx.obj, timeline, state_file), + nagiosplugin.ScalarContext("is_timeline_changed", None, "@1:1"), + nagiosplugin.ScalarContext("timeline"), + NodeTLHasChangedSummary(timeline), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) + + +@main.command(name="node_patroni_version") +@click.option( + "--patroni-version", + "patroni_version", + type=str, + help="Patroni version to compare to", + required=True, +) +@click.pass_context +@nagiosplugin.guarded +def node_patroni_version(ctx: click.Context, patroni_version: str) -> None: + """Check if the version is equal to the input + + \b + Check: + * `OK`: The version is the same as the input `--patroni-version` + * `CRITICAL`: otherwise. + + \b + Perfdata : + * `is_version_ok` is 1 if version is ok, 0 otherwise + """ + # TODO the version cannot be written in perfdata find something else ? + check = nagiosplugin.Check() + check.add( + NodePatroniVersion(ctx.obj, patroni_version), + nagiosplugin.ScalarContext("is_version_ok", None, "@0:0"), + nagiosplugin.ScalarContext("patroni_version"), + NodePatroniVersionSummary(patroni_version), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) + + +@main.command(name="node_is_alive") +@click.pass_context +@nagiosplugin.guarded +def node_is_alive(ctx: click.Context) -> None: + """Check if the node is alive ie patroni is running. + + \b + Check: + * `OK`: If patroni is running. + * `CRITICAL`: otherwise. + + \b + Perfdata : + * `is_running` is 1 if patroni is running, 0 otherwise + """ + check = nagiosplugin.Check() + check.add( + NodeIsAlive(ctx.obj), + nagiosplugin.ScalarContext("is_alive", None, "@0:0"), + NodeIsAliveSummary(), + ) + check.main( + verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"] + ) diff --git a/check_patroni/cluster.py b/check_patroni/cluster.py new file mode 100644 index 0000000..356b7ee --- /dev/null +++ b/check_patroni/cluster.py @@ -0,0 +1,164 @@ +from collections import Counter +import hashlib +import json +import logging +import nagiosplugin + +from .types import PatroniResource, ConnectionInfo, handle_unknown + +_log = logging.getLogger("nagiosplugin") + + +def replace_chars(text: str) -> str: + return text.replace("'", "").replace(" ", "_") + + +class ClusterNodeCount(PatroniResource): + def probe(self: "ClusterNodeCount") -> nagiosplugin.Metric: + r = self.rest_api("cluster") + # FIXME RC <> 200 ? + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + item_dict = json.loads(r.data) + role_counters = Counter() + roles = [] + status_counters = Counter() + statuses = [] + + for member in item_dict["members"]: + roles.append(replace_chars(member["role"])) + statuses.append(replace_chars(member["state"])) + role_counters.update(roles) + status_counters.update(statuses) + + # The actual check: members, running state + yield nagiosplugin.Metric("members", len(item_dict["members"])) + yield nagiosplugin.Metric("state_running", status_counters["running"]) + + # The performance data : role + for role in role_counters: + yield nagiosplugin.Metric( + f"role_{role}", role_counters[role], context="members_roles" + ) + + # The performance data : statuses (except running) + for state in status_counters: + if state != "running": + yield nagiosplugin.Metric( + f"state_{state}", status_counters[state], context="members_statuses" + ) + + +class ClusterHasLeader(PatroniResource): + def probe(self: "ClusterHasLeader") -> nagiosplugin.Metric: + r = self.rest_api("cluster") + # FIXME RC <> 200 ? + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + item_dict = json.loads(r.data) + is_leader_found = False + for member in item_dict["members"]: + if member["role"] == "leader" and member["state"] == "running": + is_leader_found = True + break + + return [ + nagiosplugin.Metric( + "has_leader", + 1 if is_leader_found else 0, + ) + ] + + +class ClusterHasLeaderSummary(nagiosplugin.Summary): + def ok(self: "ClusterHasLeaderSummary", results: nagiosplugin.Result) -> str: + return "The cluster has a running leader." + + @handle_unknown + def problem(self: "ClusterHasLeaderSummary", results: nagiosplugin.Result) -> str: + return "The cluster has no running leader." + + +class ClusterHasReplica(PatroniResource): + def probe(self: "ClusterHasReplica") -> nagiosplugin.Metric: + r = self.rest_api("cluster") + # FIXME RC <> 200 ? + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + item_dict = json.loads(r.data) + replicas = [] + for member in item_dict["members"]: + # FIXME are there other acceptable states + if member["role"] == "replica" and member["state"] == "running": + # FIXME which lag ? + replicas.append({"name": member["name"], "lag": member["lag"]}) + break + + # The actual check + yield nagiosplugin.Metric("replica_count", len(replicas)) + + # The performance data : replicas lag + for replica in replicas: + yield nagiosplugin.Metric( + f"{replica['name']}_lag", replica["lag"], context="replica_lag" + ) + + +# FIXME is this needed ?? +# class ClusterHasReplicaSummary(nagiosplugin.Summary): +# def ok(self, results): +# def problem(self, results): + + +class ClusterConfigHasChanged(PatroniResource): + def __init__( + self: "ClusterConfigHasChanged", + connection_info: ConnectionInfo, + config_hash: str, + state_file: str, + ): + super().__init__(connection_info) + self.state_file = state_file + self.config_hash = config_hash + + def probe(self: "ClusterConfigHasChanged") -> nagiosplugin.Metric: + r = self.rest_api("config") + # FIXME RC <> 200 ? + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + new_hash = hashlib.md5(r.data).hexdigest() + + if self.state_file is not None: + _log.debug(f"Using state file / cookie {self.state_file}") + cookie = nagiosplugin.Cookie(self.state_file) + cookie.open() + old_hash = cookie.get("hash") + cookie["hash"] = new_hash + cookie.commit() + else: + _log.debug(f"Using input value {self.config_hash}") + old_hash = self.config_hash + + _log.debug(f"hash info: old hash {old_hash}, new hash {new_hash}") + + return [ + nagiosplugin.Metric( + "is_configuration_changed", + 1 if new_hash != old_hash else 0, + ) + ] + + +class ClusterConfigHasChangedSummary(nagiosplugin.Summary): + def ok(self: "ClusterConfigHasChangedSummary", results: nagiosplugin.Result) -> str: + return "The hash of patroni's dynamic configuration has not changed." + + @handle_unknown + def problem( + self: "ClusterConfigHasChangedSummary", results: nagiosplugin.Result + ) -> str: + return "The hash of patroni's dynamic configuration has changed." diff --git a/check_patroni/node.py b/check_patroni/node.py new file mode 100644 index 0000000..b5f7828 --- /dev/null +++ b/check_patroni/node.py @@ -0,0 +1,205 @@ +import json +import logging +import nagiosplugin + +from .types import ConnectionInfo, handle_unknown, PatroniResource + +_log = logging.getLogger("nagiosplugin") + + +class NodeIsPrimary(PatroniResource): + def probe(self: "NodeIsPrimary") -> nagiosplugin.Metric: + r = self.rest_api("primary") + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + return [nagiosplugin.Metric("is_primary", 1 if r.status == 200 else 0)] + + +class NodeIsPrimarySummary(nagiosplugin.Summary): + def ok(self: "NodeIsPrimarySummary", results: nagiosplugin.Result) -> str: + return "This node is the primary with the leader lock." + + @handle_unknown + def problem(self: "NodeIsPrimarySummary", results: nagiosplugin.Result) -> str: + return "This node is not the primary with the leader lock." + + +class NodeIsReplica(PatroniResource): + def __init__( + self: "NodeIsReplica", connection_info: ConnectionInfo, lag: str + ) -> None: + super().__init__(connection_info) + self.lag = lag + + def probe(self: "NodeIsReplica") -> nagiosplugin.Metric: + if self.lag is None: + r = self.rest_api("replica") + else: + r = self.rest_api(f"replica?lag={self.lag}") + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + return [nagiosplugin.Metric("is_replica", 1 if r.status == 200 else 0)] + + +class NodeIsReplicaSummary(nagiosplugin.Summary): + def __init__(self: "NodeIsReplicaSummary", lag: str) -> None: + self.lag = lag + + def ok(self: "NodeIsReplicaSummary", results: nagiosplugin.Result) -> str: + if self.lag is None: + return "This node is a running replica with no noloadbalance tag." + return f"This node is a running replica with no noloadbalance tag and the lag is under {self.lag}." + + @handle_unknown + def problem(self: "NodeIsReplicaSummary", results: nagiosplugin.Result) -> str: + if self.lag is None: + return "This node is not a running replica with no noloadbalance tag." + return f"This node is not a running replica with no noloadbalance tag and a lag under {self.lag}." + + +class NodeIsPendingRestart(PatroniResource): + def probe(self: "NodeIsPendingRestart") -> nagiosplugin.Metric: + r = self.rest_api("patroni") + # FIXME RC <> 200 ? + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + item_dict = json.loads(r.data) + is_pending_restart = item_dict.get("pending_restart", False) + return [ + nagiosplugin.Metric( + "is_pending_restart", + 1 if is_pending_restart else 0, + ) + ] + + +class NodeIsPendingRestartSummary(nagiosplugin.Summary): + def ok(self: "NodeIsPendingRestartSummary", results: nagiosplugin.Result) -> str: + return "This node doesn't have the pending restart flag." + + @handle_unknown + def problem( + self: "NodeIsPendingRestartSummary", results: nagiosplugin.Result + ) -> str: + return "This node has the pending restart flag." + + +class NodeTLHasChanged(PatroniResource): + def __init__( + self: "NodeTLHasChanged", + connection_info: ConnectionInfo, + timeline: str, + state_file: str, + ) -> None: + super().__init__(connection_info) + self.state_file = state_file + self.timeline = timeline + + def probe(self: "NodeTLHasChanged") -> nagiosplugin.Metric: + r = self.rest_api("patroni") + # FIXME RC <> 200 ? + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + item_dict = json.loads(r.data) + new_tl = item_dict["timeline"] + + if self.state_file is not None: + _log.debug(f"Using state file / cookie {self.state_file}") + cookie = nagiosplugin.Cookie(self.state_file) + cookie.open() + old_tl = cookie.get("timeline") + cookie["timeline"] = new_tl + cookie.commit() + else: + _log.debug(f"Using input value {self.timeline}") + old_tl = self.timeline + + _log.debug(f"Tl data: old tl {old_tl}, new tl {new_tl}") + + # The actual check + yield nagiosplugin.Metric( + "is_timeline_changed", + 1 if str(new_tl) != str(old_tl) else 0, + ) + + # The performance data : the timeline number + yield nagiosplugin.Metric("timeline", new_tl) + + +class NodeTLHasChangedSummary(nagiosplugin.Summary): + def __init__(self: "NodeTLHasChangedSummary", timeline: str) -> None: + self.timeline = timeline + + def ok(self: "NodeTLHasChangedSummary", results: nagiosplugin.Result) -> str: + return f"The timeline is still {self.timeline}." + + @handle_unknown + def problem(self: "NodeTLHasChangedSummary", results: nagiosplugin.Result) -> str: + return f"The expected timeline was {self.timeline} got {results['timeline'].metric}." + + +class NodePatroniVersion(PatroniResource): + def __init__( + self: "NodePatroniVersion", + connection_info: ConnectionInfo, + patroni_version: str, + ) -> None: + super().__init__(connection_info) + self.patroni_version = patroni_version + + def probe(self: "NodePatroniVersion") -> nagiosplugin.Metric: + r = self.rest_api("patroni") + # FIXME RC <> 200 ? + + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + item_dict = json.loads(r.data) + version = item_dict["patroni"]["version"] + _log.debug( + f"Version data: patroni version {version} input version {self.patroni_version}" + ) + + # The actual check + return [ + nagiosplugin.Metric( + "is_version_ok", + 1 if version == self.patroni_version else 0, + ) + ] + + +class NodePatroniVersionSummary(nagiosplugin.Summary): + def __init__(self: "NodePatroniVersionSummary", patroni_version: str) -> None: + self.patroni_version = patroni_version + + def ok(self: "NodePatroniVersionSummary", results: nagiosplugin.Result) -> str: + return f"Patroni's version is {self.patroni_version}." + + @handle_unknown + def problem(self: "NodePatroniVersionSummary", results: nagiosplugin.Result) -> str: + # FIXME find a way to make the following work, check is perf data can be strings + # return f"The expected patroni version was {self.patroni_version} got {results['patroni_version'].metric}." + return f"Patroni's version is not {self.patroni_version}." + + +class NodeIsAlive(PatroniResource): + def probe(self: "NodeIsAlive") -> nagiosplugin.Metric: + r = self.rest_api("liveness") + _log.debug(f"api call status: {r.status}") + _log.debug(f"api call data: {r.data}") + + return [nagiosplugin.Metric("is_alive", 1 if r.status == 200 else 0)] + + +class NodeIsAliveSummary(nagiosplugin.Summary): + def ok(self: "NodeIsAliveSummary", results: nagiosplugin.Result) -> str: + return "This node is alive (patroni is running)." + + @handle_unknown + def problem(self: "NodeIsAliveSummary", results: nagiosplugin.Result) -> str: + return "This node is not alive (patroni is not running)." diff --git a/check_patroni/types.py b/check_patroni/types.py new file mode 100644 index 0000000..2b4b7e9 --- /dev/null +++ b/check_patroni/types.py @@ -0,0 +1,63 @@ +import attr +import logging +import nagiosplugin +import urllib3 +from typing import Any, Callable, List + +_log = logging.getLogger("nagiosplugin") + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class ConnectionInfo: + endpoints: List[str] = ["http://127.0.0.1:8008"] + cert_file: str = "./ssl/benoit-dalibo-cert.pem" + key_file: str = "./ssl/benoit-dalibo-key.pem" + ca_cert: str = "./ssl/CA-cert.pem" + + +@attr.s(auto_attribs=True, slots=True) +class PatroniResource(nagiosplugin.Resource): + conn_info: ConnectionInfo + + def rest_api( + self: "PatroniResource", service: str + ) -> urllib3.response.HTTPResponse: + """Try to connect to all the provided endpoints for the requested service""" + for endpoint in self.conn_info.endpoints: + try: + if endpoint[:5] == "https": + pool = urllib3.PoolManager( + cert_reqs="CERT_REQUIRED", + cert_file=self.conn_info.cert_file, + key_file=self.conn_info.key_file, + ca_certs=self.conn_info.ca_cert, + ) + else: + pool = urllib3.PoolManager() + + _log.debug(f"Trying to connect to {endpoint}/{service}") + return pool.request( + "GET", + f"{endpoint}/{service}", + ) + except nagiosplugin.Timeout as e: + raise e + except Exception as e: + _log.debug(e) + continue + raise nagiosplugin.CheckError("Connection failed for all provided endpoints") + + +HandleUnknown = Callable[[nagiosplugin.Summary, nagiosplugin.Result], Any] + + +def handle_unknown(action: HandleUnknown) -> HandleUnknown: + """decorator to handle the unknown state in Summary.problem""" + + def wrapper(summary: nagiosplugin.Summary, results: nagiosplugin.Result) -> Any: + if results.most_significant[0].state.code == 3: + """get the appropriate message for all unknown error""" + return results.most_significant[0].hint + return action(summary, results) + + return wrapper diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..a3066ad --- /dev/null +++ b/config.ini @@ -0,0 +1,9 @@ +[options] +endpoints = https://10.20.199.3:8008, https://10.20.199.4:8008,https://10.20.199.5:8008 +cert_file = ./ssl/benoit-dalibo-cert.pem +key_file = ./ssl/benoit-dalibo-key.pem +ca_file = ./ssl/CA-cert.pem +timeout = 0 + +[options.node_is_replica] +lag=100 diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..07c0723 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,5 @@ +[mypy] +# nagiosplugin => Skipping analyzing "nagiosplugin": found module but no type hints or library stubs [import] +ignore_missing_imports = true +show_error_codes = true +strict = true diff --git a/setup.py b/setup.py index 2c2f93a..556aef8 100644 --- a/setup.py +++ b/setup.py @@ -19,24 +19,29 @@ def get_version() -> str: setup( name="check_patroni", version=get_version(), -# author="Dalibo", -# author_email="contact@dalibo.com", + # author="Dalibo", + # author_email="contact@dalibo.com", packages=find_packages("."), include_package_data=True, -# url="https://github.com/dalibo/pg_activity", + # url="https://github.com/dalibo/pg_activity", license="PostgreSQL", description="Nagios plugin to check on patroni", long_description=long_description, long_description_content_type="text/markdown", -# classifiers=[ -# "Development Status :: 5 - Production/Stable", -# "Environment :: Console", -# "License :: OSI Approved :: PostgreSQL License", -# "Programming Language :: Python :: 3", -# "Topic :: Database", -# ], - keywords="patroni nagios cehck", + # classifiers=[ + # "Development Status :: 5 - Production/Stable", + # "Environment :: Console", + # "License :: OSI Approved :: PostgreSQL License", + # "Programming Language :: Python :: 3", + # "Topic :: Database", + # ], + keywords="patroni nagios check", python_requires=">=3.6", + install_requires=[ + "urllib3 >= 1.26.6", + "nagiosplugin >= 1.3.2", + "click >= 8.0.1", + ], extras_require={ "dev": [ "black", @@ -44,6 +49,10 @@ setup( "flake8", "mypy", ], + "test": [ + "pytest", + "pytest-mock", + ], }, entry_points={ "console_scripts": [ diff --git a/test/__pycache__/test.cpython-39-pytest-6.2.4.pyc b/test/__pycache__/test.cpython-39-pytest-6.2.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b819608e89b1683c991083d034eee88190e0ac8c GIT binary patch literal 3375 zcmb_f&2Jnv6t_LIv)`Ld(vP%3C4fMTL^hl7(GRs%pyK0zBE*Lfa~Vx$yva1PGmGsl zP1wDVO0AF(y;Gzt;!=q-2M(M#^l$K$Q~v|BLcn|8O}hyt<&atV&GYlK-_OtX__sl| zS|afL{K-r}%7pxmmB}Xul~pM6ClC>dXhJ+{-mGmgLP^{9a&*+u)!7x$+5%z$2>^-l1U{AA?ZV^KaT+_Jq> z+_`=_Dw<#E8|z&Xm7~fn5>4$)rthrPduO;z^ z-8AI_Do#5Kw?JdsZU=FyXTqc_6&HS*McfawZmPg*>VrUuER8?pGV6+vqk9^(xpo9s zU6F`p2(j(tNHoj-`q)&Tvj}FMUbA8rhdu%{HB@A+A!*AE3 zu2XnFtMVJ{Jr zyc;Cr7E3Cjr+hz1(+oPsSt`NDic@7WGa>4LOq>KUi#Tl%C03xu5+eNl2V1b9%}nPu zxlLOvq7l1Q=vzIq2h-?T12$eZdo5)9ec_Edb~RCIpeUl~9kNUB&|Lm)#OdUDq`}^f=Qatbmv@q8!{AbH7+Mv z7$ov)CpP&;sJ@W|liy@qZT$gm>2a8wlS!SRV09t=1(Oj#R{-yi6J;o-S%_pQk8 z+dZpiZ?S%^my>%cx93ElwP@s^y~PH0lvnw^!hqZ$avj=>JvN{s0j-2u3Lh4uTLWs| z_JBr(d)6t^ciNzn5J+I2pTIuBnbeDbwpFNxOEhxFZ{X~@I|3-P2%J*TcBXEL8 z4Ll6eyHv~>T*rsCawwzK16XU583iPNJQeFN6Ig#cW~@_3Vyz#*%H)qv!OCRo5mqKT z$BcFQ5Z0L^@@mNso`ThqGmo%Z;+bQ{I(sD6gL$>&pHId5>jc&Z$A{HiekazvlmEV& z{cP*u1NdIJxglHR!?{hRI`Y!e(sHx0(rheWyx4g4(uHR8!cK8)JIgP=+}U1nW$DYYvT$VqwWcc@{AR4o4OHqcTp5{Pcj3l^&tW%Fa1&bK zhhhcA>pQhT!pB;f{^wokf$_Xs7B6DZmr!8%At3r95NVul4DHqCrR8C1aWNFJisAD- zWUDK~>BU6>ECn3)9Yqs6&OEm7`-=uNc+Wo}KjRVnaVR%`2!>cjKsc^gL4gB3#LRdv z95LE{+=qC(cxRvB`JeF?F^Y+|gqi_9|AYbdL%3CeLCkfbojARb0b9fCR~H|xi!QBp zwvoH|fzZ=FBrpNu&=FzQ?gT)PD$wk&cjH9GDXiP*2Gn!D+|0U3WL*7?BwG&>!zXmn zmq81dRY&OBn`SM6HoPrFCiJY|%0xR*KEK&Xg47TSw_t8T*bs(P3K!9pG3IC^EPE4` z*Cs2kj@(8=zAh1#_6w{3lGG)A4j?E2ludgcG< z;w>1*6Dm&W~;4Jod4X9D3C-iGg?^e%Q-q zHFX~ODG*SvJYG0tBPNU9^g$*9oy~HaL*vaJY&0}QAUD-Ekd;kjgvg74;W@bSK=SbK zjF?A(yj4><;#SB$%uFZpY-o#NEQQM>HhC3Kf;BtmCcsV$Dw4rd{(u zc;bcrfIh^1B5$a_g5R*ONJx1`Lc&OebHmJ(eBap0W>XSe z#b2kb8)-@U8J)>X0p}*%;WGd#QQ}H3ouq{7Pi1P*#J6gFYV0Rz3jF-oPty$eg|VNdIq=^Y`%^Rz z{xl_@OU1$-%u`w~k~n$Gb?$EZzRkeN-U*r=n|Ug5!;@1Uxfv--0`AK!i)%Qjc&F!NO2^TOm?Jc4V$lg{sh^QJorG zPz>f zkV8Bj$$i4U1}lS>FGCz{rBB3D?Gu`Ps=Oujbef}6k?s(%4W5N)E;9OLn|uSQQ>C5) zw+7!Y=DH!#{4;Xg%$|_&F1&xj+@Qw{^boUqLf8c{mw#n`RKkR=$LE^vX2@-3HUq!q zY?y5;G@EV9->~W4Pmqd%T(Spe+MIX8)k>vSEzeiWwUw3f!s>Fhx;)TJC3vG9&t6-s z($-3=I$x_bmsV;^^9!xoa;v#u(-qoitXv&rN~Mrn+%B~o*M4-iWp!*5P1CnLyDUP& z-@{$8O9t}V+AeuiyLh>~HD3%pk%p+VK{b%X66RaBaF|OVLRA#spx8M zxIx2m#WKW6GqhT;SGp|DeJW<_1paNt0v1o3t$=wJH|>XA*YYh6k=+C{#5|eUi);p+ zG=Assr`9H&#Ytyj=nRLvHt8(yN~{Fq4DM9gfoE45wjVgWvH>@>T&EFKnr*w;F}oIL zf$voC*9$O@ucUH!E50O_>G(b{AhWy>X(0RnKq9)FAu5q&nH+plzsoPw%y+U%5`U@+ zt3bcMu?Q}SV38leIF3bNXDZ{JsRC!Hhd5)0Ydbmx_5jY%$DCo{ZWWx-fOt}Hh6`3| z#2JRLku$IjIU_OR44~aHXQUyTc^PMjJ(*q?b;wXUirSItWLaosIRxZRRzR3Wm_c|G z;Vi;AfZgmtDYNs~ignE1LAZeMF2XFrdkAv~?<3&)vx^9q5U?DEPzEc$@~TV*9rJ<1 z+sGyDS$HGF36Jwh^cQN1)u822=5X`jZx)&R4UGNFaha0yAc5QR_vet|cR&I9gh*e4 zFskrDW$^PgDO{|AWa8&5xVZ|tp;B$68yeS9FQ6sCjp2x(85%IN5-Il*Jry)Vi_}O1 z&Cnw~`~}qkb0#zc+DDp^-pPpk{{ZnUssnotRt_zp8E8Y^MVmI(3`1y!Av7bwry!b- z5)+!Cf@bJFP2@knKDc=NE}0<8m^^rNRHz%Z}g zn%ALMaVAdeAFG6d#b?doL99GH`M}|*%xu9>adO-g8>2R%fAwtBb()UN@Y0NiIov+Bt$fE-3#cZ ZYu`W}5Ay(*NCvnlujJ3<3;CIR;V%Q>Zb|?E literal 0 HcmV?d00001 diff --git a/test/__pycache__/test_cluster_has_leader.cpython-39-pytest-6.2.4.pyc b/test/__pycache__/test_cluster_has_leader.cpython-39-pytest-6.2.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..982c1069b86fc2c2f07dd38aec7f447d8d3d72c6 GIT binary patch literal 1568 zcmd^AO>-MH7?yU|`{9qYC6oz0|)oF7Uo`$e79S3y?>T-Vf>3zF%@S7<37Y<)6FR$|d9vWVQ#$ z$V-^I0$_wuK@w_Lr*s`cN$DjWiW*1wWsn4xc12i5Nn~kHbjx1Svvf!F%Rw@rghz}r z|DD4E7XIQTJGXqqy1?IK^m{VwUBR8?&5*XyD^Yy6s4A|28GI}A8CTyFXJ(-|2)r^Y zDp-SZnU^a|*Yy10qsQ~*;ZS#fx*_(D_mLV8baB!adB{Y!=G*Se`OKos;b@`V`PzO`A*JzYY z{dzQ&B_B8eOynq^@_d%gGoxfxjL>@v>D`3I^JV+UhUcyh3L%1PbRU3F z-|172hJSgk8Up|SXoZpGD>>|96@DO1i0TZ-=$3XpejAs(h z+>P<4ARXRs!+%OsJwh`drki4pH`N0L7AyXvDhw3nt*8xdWNHu3402KAvlx?HRHwFM zlHN^E;Pa;NZEhd@t7c+2v+1Ocky2>eVm9vzKWxi8>!)=w{IZm6A^2DL259^{_?Y$` J___D{e*@B|uygsh1MphQ$S z<-t3~Ltg&X?`_@56l8MSl@?s0bIQ8K01%z6XD zz2E4Z&DfOv=+8Xv&-|%3^+#-0nwHG9EnNp3xKnTi^`~s^^U#LZvAw@YRL2iK=TN`Vh;4XpKPIHy+1~L={k&-~)Mg^>!Ye$O zhj4)Ke7O}HBeZqvU%*6AzF0IggY$F=pa|0Zt%7X%C{*sYFRCZw3oDF`VqH9pl2}Ug zR3|DM$66G1Fh~7KDs7s>@lwGIQ#^{yP)#IvQ=@@Y{a8kMoQa|mnfO%T4#n0t?ndjDBfM|?|t^r(D_xI0You_#|xMU{>C?`6#&_VLj zKj|EGZ(MC+?DQ4_re1F&e1PyF!Y;xm2$;|nG=VF+_oU>uJ)^VK4xx7@xsB`)8fK^@ zH@K<6gKjg$Xz-6;*Pnx?xm}cRYg~T}isEs!>WbE?453n6TAacCp~CM}MYz`K$GE1E zKv9fZc(XJgxYp~{*3E@4rN(Ezc=%rNwP4MvB}-N*XP>s}ZNKv4WLa*qUj;ho4H2sD_12MU|B?|p~j7$*iawnef?W=U~qF__Km)I zqz_X4RD3Q|=N3op09`@$1AS=nv`n98hT3y2-pBfEUmr5?QOl7QqQjJK44HbHL&h^N zjN974;^?}*ub$=YkRxCzO&VAZY3*MzXfv8h>i8#-4Xk)^}&Pg z=~^|;eLq!JcUMVkR^z%j@g$DNMQ*UVJ27umV{&xd^X14$3~WbYjl{-y$%$hDQhmRP z4biwcDuRyVK?+4;dExU2xiSf4=iMi{z3!^6?fstT zOE1LX{Rmsc$z40{L_MFYruLy9?K{35I)O;CcI+Go>LFSBTusS~!lwWcBo+H0g20is zIO+OM=t!&?Wx!Ii1GVj-L?BONPS(@93?5wu>k}~P^6_P`HPWDp{fwVBI#D1R`yz}y z**L_)9pBrJ8g56pM|Rhd5QScYR=x8%-^N~ zA0xhTp#T)mKnt$sfeu5+lc2?qv$fi>1ECilj?KO1PHUX2)m-qThnr=r@2!o?wHgtO z4Q36YK_9OCYoE1;8g)x?0!0$bBxqYenZzv;w@KU~ai0W{JrNI4OCB!f%RcsG$Ce$8 zqE6)VxPtBDB@{FMbY}f!mS7DnH;%#f{cHB{loor+pCWjaeu-dcqCXU4&?1c}x_A?c zK3YW4OVc4fukurX4Yc1pe%^ltKlAXQ2!wSaK%MQc86yw=kAT^Dy9*fDypastyM}>R zN5&^a2c2b5C!r3nN2I+bG4CxRW;%_pu8TCw(Kkic_uQj8 zO$2X9m9w?;)2xgVY@mM^N%=onOC7(Nw)MSiT3Je7H<%S)Y*KtT$$Gk_+0>7GV*_YYg!KDht@ literal 0 HcmV?d00001 diff --git a/test/__pycache__/test_node_is_alive.cpython-39-pytest-6.2.4.pyc b/test/__pycache__/test_node_is_alive.cpython-39-pytest-6.2.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3862da310f9c750b2701a2b8d456fcbae7c2f12f GIT binary patch literal 1504 zcmds1&2l3(5SBFK`LR9Oq_Rr|PPqWTH{W;+o?)(>a^{u;MN1ATr-~;)Q)+$Pt)J=cPs);{i=h4e*LL=fAoQnI z)|&^)5p-RFV2ELX#@LOd^a#SJ^v4}+Jwya$I1U~5L{!G(*kNCE%ig%>a7XmZWSn5c zV}@Dqp0JQbzxd;gdp%}d&>u1U0~+*h;7s&(fZOHwsvO&$?pg$62>lIo{W}PQu26#wt|2@|t~`utd`*mZ zf|&OkUXX^=0Oe~`Q^^3F*E&oINTeh`=fO4)vNT`>x0qgK-s{R&BPMllEcF# zzTErx>Fi>Epu0caf!)Jhi$(*T^7F#b98T#ryN4_D^MSS*)pKE%UhBa;EB38;e!G!r z&6RQMU&2H%zFZ~>NQG1fL>m;g<@?9omhr-7gig`bH_K+HHC) zRhb#e&u1d5G6SAvqyqPM%JEBWSfyvrT*b4uu6(hsJXTY!~#3g|x*Ih9-BdYH)1 zz%tluqkkEvKC-Jnq$`f5D^dV&QpMk?!T{gyO>KB*Q#*D|l8YjrrnZ1Zb>c3%)dlGoDTQ`7#}!xEA6pB`h36uRzAPo13;vb8+S(@S6Wk~8d7IHc D5K5Ju literal 0 HcmV?d00001 diff --git a/test/__pycache__/test_node_is_pending_restart.cpython-39-pytest-6.2.4.pyc b/test/__pycache__/test_node_is_pending_restart.cpython-39-pytest-6.2.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..093fd51590a20d90dd2fd9b4894565109dcd1919 GIT binary patch literal 1598 zcmd^AKXV*46qmGn`{(Y~NyZRNnz#XSlVR`d5aMJo6DC6nW=H|uHamMO*3p zzZ+)#bAWjcLsbA2QOr?-?MzCCAdE^k>0zTG+$;UWx3t59GE72CyS!fxl7Xdrd{{+d9;3-NtF$8EJb>Po3-DxkrIIfKZJ5fGk|S0awzRXrc6Jw-25+6asP!m>-gpHD+Sdcn4jX@g&+r>aw~JZ_Mz3`#GKqPW z+^Ym-X`#}ZRkWxMQ^}Ohq)x@`;#Y`i>70^}+3D7i*0p*z9`8)z-ATOj^lAL)+1_Nb z*S&Xx18GvV#Fu1$e~C|bK6p@{?2c6bhbv-xf7?)#k&4-Iq0=0W(Qmi+SK`MbWm2x@ zTrZu@0XqY`1|DB-WJ)oq?fNHR2*M}JNJ7;i*#qdjqIxah)?CAc*Y3Oi^ZGiF!%`%XlJ)N`#C^rlVmrM=YPEb*81Lim{2_LVClk#q~*d-{$BB z1E`A-HdI3Zggr9EE)M>59XSI2f7Bi`QTzlIE=@8nZ0QYvl92&(v+|aKO#|;3c-O$T z0drp~cmULYWP2uzi+^YPAxKBJyWsC~mG_LV_tRB@$E%8hqKhT_UKSdP^;*>VS2DR} z)y)|9NOP@Q8Kvd6Z7~<^Ky50{!vFUtXXwePYc17Z97{5(j06pL>sJ)Gx)3& Wbk5o5=3P+c&*DQoB=B=?_x}Q%pS_v@ literal 0 HcmV?d00001 diff --git a/test/__pycache__/test_node_is_primary.cpython-39-pytest-6.2.4.pyc b/test/__pycache__/test_node_is_primary.cpython-39-pytest-6.2.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..494adb022735a46a0c82f90bd23ed00f24fc4b0e GIT binary patch literal 1542 zcmd^9J#*wV7?xzOKlURZggXK>X@J{d@NRM(m%Aa%!3}g=0i7F<*HRL*vgMJqM>g2a zTyS!WU;+~Z|yk89B0Y)sO zn0jvr^=a_hjW=%fkaj`er1(cP>|Mc`=*1AX;d7q(85rCm z%}W^lVxAP>8tlfnFjt0e@JN1(qD7 zu8~Rz*vifV+tFQM8oY7tqsF5UdhO*HWMB8d+pqlvzQC^_-7abz=$*!;$Ry@P{7xw- zOLCP|GA~j&PsH@7*-vv;&IKN>2NVm0G>>9h{aGjL$w z(e*~E6qDMne+(1B_;@*xz%nE|0IipoF9lS|HA;B9ecOFj%{5axNoBj8WGUx~WR=WB zQKgc#eQ+m}SQu})wNsN5E+#2Y%Cul@n5guanH}1VukA18qWo4$ zA={ngSjZyP2|KHJTBaI2%RqYe@1!N1+%QVdoVki;e_i=}U3s`tR$G3tt{g6r`~>#7 z{$_M43O1UsQsjDc0-dHjpNLU*%Cc!vrCN$IADQSaBzLM6RrB^eo0~fhsDB6tC_`?9 zJ<`WpSPnt{A5ecP^51~P(j?%@(yIrO92hXwlbZ&147_XLo`F3BrY0+R0MdVCb}F{P z`F3VM19!Iy=gGtRy;_g$Gs|6|-I I@Htz-Kj>ViegFUf literal 0 HcmV?d00001 diff --git a/test/__pycache__/test_node_is_replica.cpython-39-pytest-6.2.4.pyc b/test/__pycache__/test_node_is_replica.cpython-39-pytest-6.2.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b100c3d1daca7e9dfb4359c6d5dad0db6fd6568 GIT binary patch literal 1928 zcmds2PjeeJ6qokD|D{el)27p5CNm5SE_iLHl%^fR6gqID2e|EJXKg9YV(rQ!$t5W^ z2f|0-*h9?CH$DcRVXvHW=i~$Mo*Xi1fD6a2M?d}Elk}wb^z@vyT6F?n_xD@bA1)!k zp>XvB9eD~(e-FS2qk<&V_DNedY3I8}f>S5=@mOe#kdEnR#O^dYoo(DW|=hMbZGHFOT{ zGUwE#WKPc<b6=ks|14m@4#73lA395qg(f_U zmgEJ|Lm02koh4NQSRGjj0kGU%Qrmk=%A!~9ZL$a$gkm1#6nJPFFxs4lOL|Iwfbe@{ z;X?Bl9wQK##`EfCDH%_5ohm*R`6x@}`Olz);UH=Fsg7VR-r5xB#`2barmoFsTl7~X=(tK5aIz2I5 zn>16^b~?(0Krhuu7Sl}estLpCaFz>`m$1E%Fhf^6sXmf3!R*v@EaWf~X_*zgs--$R z9oOPQPA7#hn&K9s%D5g$#`X=aox^B#I{A-#56@${s+ zV`Fn|0PepA8)!^!LIbBs+f;o3{EbWf@`_7Avf5=Qh2zfT=bD-k!RNa2w$c z!VbdQ2zVzOcpun*M0O(4$m@~qf#!{itit3`U4V|S8pxXnEr6GPw=1%7;tu+K6{Gfl zhWOsUChtV1VEr!G)wm@ju-l{_jsEmp)d#t=T}6Mtc=Z-2s=Mi?M#P&(3GX&m{9Bb9 zc<-00D!kCCd$^`Gf=W4w@dSA}wuLBZT$CF8zfkz+S8c!c6k}MkDNw$VQfOPb?88!+ j1NL-R(#R&1&86t$Lb93QpJ08pUcF134*cA9)QY}T7@nE^@@E>Wr4-adgaqn>ytY$H(?U>D1)S&sZoaJ6JCkJV*&SUODB?$p^&yj@`&e5kG)k%`@-Uyfg2|Oww*Q2z;OabvOCdA>>bF zwm*=O7cj~KfD=w78B#m5++l>0+#PxpHB9<>Fbpj1$XZ?>)-CPIM&2AYE$zux-X69o z5p_temCu1Xf>2O?#1<|OOmJ0OkW*5QKwqD+ z1=SL$22uqCY-wjf?d&cnuf1{Zley0!^qHSgpaatcZEF@R=q3Fb((RJD1H+rUoM2*J zRrdKx6!? zPPe;R+}$j`@b)7x)tIU!U9rQ%CA}DY_-JyzAC`@uHpJfH9#Z|VjKo=H;uMb2nC~5~ z#ZST#Q(jJ`Svr*ib_n|jPgXlgSqg3J`lnzB!l%o&X0V_304hH#P8C$mMW%k^zG}Rf zoEuS^IMG!nP7^6(Ehaiu`6SVzYJoZ)O*3h-0=Ab5EXwLZT%M?@_K|Bc=1i#NuoslOi#oS=O{~Pp54I>Rn`-xaT^a!8Y^h zHuHGRtX+P#%^WO=4#Cgr+un)FMQOVfD3N`ecr#e zeb+|m9Rw_^?jXE_a3A4agbxs~SZnwgsQ=3LSiKJxw`IEv@^H7Ry=_(f5M8|&ubVqs zcM~*Rq{WXqGtj6vqAJ+PG&--^sm#)GgeA|4V|#;!&CMObhfm|HU3LD|LlLZ5-z?84 nB};qdYzJsO*j9u#RvTvRi(K)k6kp<-P+~4WqAdo$v)lOxoMErx literal 0 HcmV?d00001 diff --git a/test/__pycache__/test_node_tl_has_changed.cpython-39-pytest-6.2.4.pyc b/test/__pycache__/test_node_tl_has_changed.cpython-39-pytest-6.2.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e4d51b6b9aa33089c33bed2ea54cc9b4434c5f2 GIT binary patch literal 3489 zcmeHK&2Jk;6yMoTf5#>P^#j^aXsIc&owOloXi!l_Z{$!86~3%Co^ibOeps`Uwn^6< z2q!M|59lH83Exov3jT(@A|Zt{Lc&Oe_r{Lngfwj>P6%83+xIa$JD&I6?~R>&KBK_r z*Wad{C6InbW%8qeaucrTDF9U{@sv7|ch%Qag($vWHwb!E&-AUjC1uS^`RRIE%DR{F zv-PZ$4KL^C>v_LWFA#;LDWT>Qm0C3QtzMrR>uH(+y*SphGzWTVtmkO~^f$)(6fJ^2 zP05!^xwHrCRMyHQN#F9^JDWkkI4Joq!bY3%+wMcL$r%`Q-*JPonppmp?Sn7TTMQg( zVxq=)VsZvUg9gF*MxPYib8y{)D_RGSPQNpuIEKVfY!Vg^RY)jcNsoLtMlvOcO{K-ZG#Af(I|p51aHyU}uj z4Mz8Vf*1_cIW{=e5~35WRIBr~%0jI&fBkyp>dJDhwmitp%?a0Mo*S@4U0fdI=H?>d z2sYPrJ+^(i>9m=Rq8&Ist4NpB_b}=EWT39D_R04A#mk+og>sa6G!kZ4XHlw^qY8WI z3cCTT%XDW~hvvm{gt-%KdZMo-8jM3&Kv*0UoG4;k$mf?}AQ+d*8qdP}I3^ZP1C*z@ zin0Nam~L=CgnZOd(IUaQiBfv}<|K2ovn5z0Y=y9V4AxYbj(}Ys!@TBmWT&)`hpK~7aWZG_q+;@b{ z9(Ft@a0GbzDQ?Mia#?D829+$nbNEx|CzWfH%GIGVobuyIWx22LIhbc~yV?qUR$XU7 z=!)tFT-5R0^|0D#u}0hOID&_PTg6{5!9FHYDxIz5yxgB-oG=A<{0MhFdLKX`rkW!< zQB{>3d^3Nj&-L7Qs!mdW=^C%XxPQ$-^!iG`DA1uT6I|%z>6DlQV zp`q=l-=9Ig-UUAI5u)6MHmAcEc|Iu4E77$YG(h}*1((pF3+U7sbpb<|$o^nS32Qhb zbN~Z*R*SWJsjdzkz=-wOfDT~BX7mekJqAp60I-ibKz27LgZ~5g^T_r58At_`WCuVQ zf-Y^wxC2i*ahA1L)8J%&sAWUtAlUJEk@!2r(fKwu_H0?C6}$$U@tW1%4i8uRZJ6S?YX9Wu#{XaK`_;}db@^3*?b4y5hNzuT)hqba$??2~ zLzWO;xv=)*c~Dq);rW*r)(sd{o=H;sCmY#PlGFC^EL0gDKHz*(;Y<=sk{+89Yh;rR zDakiHx6!U(>AS&(}shSen>Yx_9-^js0eV0=*$@O?H+AoRX9 z#fi3Gmyi6+Y7A_DmwRjK@_X&f%oaM~LDu<1huCV|QWro+uT0?B#(odG`v#rIoc7p) zJ;Pjn=X>l77H|nG_H_S@FZd;2u}h)E3Vvc7bClSYnC%00hq=Sg88q!@^a^8%90qyp zOg)eK>tW_cx4%qekYjCikY=juAg#_-h;>~Ur4wN#q6l)D`*DELb7p(E{`-r;*i7`` zNY|$HgNN8vUX@2?P>gkPlHqPjdEmA418+>_`qR_UdZ<0UOO(189fP-^19lr{KO`b_ z^O-kU?!7IKT!=^3%n?uID&t*2+pil{>MnM-k4`gUJLNwzWJoGFU^>E)A0X?Lcep{U zd^JT9sO0xBvmjOO^AJ0q`@;5kwSE8l8jKi0(PJAJDvkO{%bTJoG5B%p&tyJ}uwH zkZaz9`Tpb->sp-j4=PjSl{@Uev2}jy4e|8-=~TP^f4-jntBVt933d?9+Xi2VP<6!^ zS5V%;NJ9;S_k{fvSILbaNrIt8l1EL4%_(v28|ao&Nq7GT z=o*bgxMNZh5sJEqr@7TAjTXK93Avl(Nbv#Ox~_W^p{K8D=XE*T2Wu%F(%*z5DYc;u NU5-uc#9Ocv{R56!I*kAT literal 0 HcmV?d00001 diff --git a/test/json/cluster_config_has_changed.json b/test/json/cluster_config_has_changed.json new file mode 100644 index 0000000..b6c0015 --- /dev/null +++ b/test/json/cluster_config_has_changed.json @@ -0,0 +1,16 @@ +{ + "loop_wait": 10, + "master_start_timeout": 300, + "postgresql": { + "parameters": { + "archive_command": "pgbackrest --stanza=main archive-push %p", + "archive_mode": "on", + "max_connections": 300, + "restore_command": "pgbackrest --stanza=main archive-get %f \"%p\"" + }, + "use_pg_rewind": false, + "use_slot": true + }, + "retry_timeout": 10, + "ttl": 30 +} diff --git a/test/json/cluster_has_leader_ko.json b/test/json/cluster_has_leader_ko.json new file mode 100644 index 0000000..198fe14 --- /dev/null +++ b/test/json/cluster_has_leader_ko.json @@ -0,0 +1,33 @@ +{ + "members": [ + { + "name": "srv1", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + }, + { + "name": "srv2", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.4:8008/patroni", + "host": "10.20.199.4", + "port": 5432, + "timeline": 51, + "lag": 0 + }, + { + "name": "srv3", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.5:8008/patroni", + "host": "10.20.199.5", + "port": 5432, + "timeline": 51, + "lag": 0 + } + ] +} diff --git a/test/json/cluster_has_leader_ok.json b/test/json/cluster_has_leader_ok.json new file mode 100644 index 0000000..547d6c8 --- /dev/null +++ b/test/json/cluster_has_leader_ok.json @@ -0,0 +1,33 @@ +{ + "members": [ + { + "name": "srv1", + "role": "leader", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + }, + { + "name": "srv2", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.4:8008/patroni", + "host": "10.20.199.4", + "port": 5432, + "timeline": 51, + "lag": 0 + }, + { + "name": "srv3", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.5:8008/patroni", + "host": "10.20.199.5", + "port": 5432, + "timeline": 51, + "lag": 0 + } + ] +} diff --git a/test/json/cluster_has_replica_ok.json b/test/json/cluster_has_replica_ok.json new file mode 100644 index 0000000..547d6c8 --- /dev/null +++ b/test/json/cluster_has_replica_ok.json @@ -0,0 +1,33 @@ +{ + "members": [ + { + "name": "srv1", + "role": "leader", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + }, + { + "name": "srv2", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.4:8008/patroni", + "host": "10.20.199.4", + "port": 5432, + "timeline": 51, + "lag": 0 + }, + { + "name": "srv3", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.5:8008/patroni", + "host": "10.20.199.5", + "port": 5432, + "timeline": 51, + "lag": 0 + } + ] +} diff --git a/test/json/cluster_node_count.json b/test/json/cluster_node_count.json new file mode 100644 index 0000000..7c7b2e0 --- /dev/null +++ b/test/json/cluster_node_count.json @@ -0,0 +1,32 @@ +{ + "members": [ + { + "name": "srv1", + "role": "leader", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + }, + { + "name": "srv2", + "role": "replica", + "state": "start failed", + "api_url": "https://10.20.199.4:8008/patroni", + "host": "10.20.199.4", + "port": 5432, + "lag": "unknown" + }, + { + "name": "srv3", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.5:8008/patroni", + "host": "10.20.199.5", + "port": 5432, + "timeline": 51, + "lag": 0 + } + ] +} diff --git a/test/json/cluster_node_count_critical.json b/test/json/cluster_node_count_critical.json new file mode 100644 index 0000000..f35ccbd --- /dev/null +++ b/test/json/cluster_node_count_critical.json @@ -0,0 +1,13 @@ +{ + "members": [ + { + "name": "srv1", + "role": "leader", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + } + ] +} diff --git a/test/json/cluster_node_count_ok.json b/test/json/cluster_node_count_ok.json new file mode 100644 index 0000000..547d6c8 --- /dev/null +++ b/test/json/cluster_node_count_ok.json @@ -0,0 +1,33 @@ +{ + "members": [ + { + "name": "srv1", + "role": "leader", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + }, + { + "name": "srv2", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.4:8008/patroni", + "host": "10.20.199.4", + "port": 5432, + "timeline": 51, + "lag": 0 + }, + { + "name": "srv3", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.5:8008/patroni", + "host": "10.20.199.5", + "port": 5432, + "timeline": 51, + "lag": 0 + } + ] +} diff --git a/test/json/cluster_node_count_running_critical.json b/test/json/cluster_node_count_running_critical.json new file mode 100644 index 0000000..e6016fc --- /dev/null +++ b/test/json/cluster_node_count_running_critical.json @@ -0,0 +1,31 @@ +{ + "members": [ + { + "name": "srv1", + "role": "leader", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + }, + { + "name": "srv2", + "role": "replica", + "state": "start failed", + "api_url": "https://10.20.199.4:8008/patroni", + "host": "10.20.199.4", + "port": 5432, + "lag": "unknown" + }, + { + "name": "srv3", + "role": "replica", + "state": "start failed", + "api_url": "https://10.20.199.5:8008/patroni", + "host": "10.20.199.5", + "port": 5432, + "lag": "unknown" + } + ] +} diff --git a/test/json/cluster_node_count_running_warning.json b/test/json/cluster_node_count_running_warning.json new file mode 100644 index 0000000..a53124e --- /dev/null +++ b/test/json/cluster_node_count_running_warning.json @@ -0,0 +1,23 @@ +{ + "members": [ + { + "name": "srv1", + "role": "leader", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + }, + { + "name": "srv3", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.5:8008/patroni", + "host": "10.20.199.5", + "port": 5432, + "timeline": 51, + "lag": 0 + } + ] +} diff --git a/test/json/cluster_node_count_warning.json b/test/json/cluster_node_count_warning.json new file mode 100644 index 0000000..11b7383 --- /dev/null +++ b/test/json/cluster_node_count_warning.json @@ -0,0 +1,23 @@ +{ + "members": [ + { + "name": "srv1", + "role": "leader", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + }, + { + "name": "srv2", + "role": "replica", + "state": "running", + "api_url": "https://10.20.199.4:8008/patroni", + "host": "10.20.199.4", + "port": 5432, + "timeline": 51, + "lag": 0 + } + ] +} diff --git a/test/json/node_is_alive.json b/test/json/node_is_alive.json new file mode 100644 index 0000000..b697269 --- /dev/null +++ b/test/json/node_is_alive.json @@ -0,0 +1,19 @@ +{ + "state": "running", + "postmaster_start_time": "2021-08-11 07:57:51.693 UTC", + "role": "replica", + "server_version": 110012, + "cluster_unlocked": false, + "xlog": { + "received_location": 1174407088, + "replayed_location": 1174407088, + "replayed_timestamp": null, + "paused": false + }, + "timeline": 58, + "database_system_identifier": "6965971025273547206", + "patroni": { + "version": "2.0.2", + "scope": "patroni-demo" + } +} diff --git a/test/json/node_is_pending_restart_ko.json b/test/json/node_is_pending_restart_ko.json new file mode 100644 index 0000000..ea4d396 --- /dev/null +++ b/test/json/node_is_pending_restart_ko.json @@ -0,0 +1,27 @@ +{ + "state": "running", + "postmaster_start_time": "2021-08-11 07:02:20.732 UTC", + "role": "master", + "server_version": 110012, + "cluster_unlocked": false, + "xlog": { + "location": 1174407088 + }, + "timeline": 58, + "replication": [ + { + "usename": "replicator", + "application_name": "srv1", + "client_addr": "10.20.199.3", + "state": "streaming", + "sync_state": "async", + "sync_priority": 0 + } + ], + "pending_restart": true, + "database_system_identifier": "6965971025273547206", + "patroni": { + "version": "2.0.2", + "scope": "patroni-demo" + } +} diff --git a/test/json/node_is_pending_restart_ok.json b/test/json/node_is_pending_restart_ok.json new file mode 100644 index 0000000..d47b18b --- /dev/null +++ b/test/json/node_is_pending_restart_ok.json @@ -0,0 +1,26 @@ +{ + "state": "running", + "postmaster_start_time": "2021-08-11 07:02:20.732 UTC", + "role": "master", + "server_version": 110012, + "cluster_unlocked": false, + "xlog": { + "location": 1174407088 + }, + "timeline": 58, + "replication": [ + { + "usename": "replicator", + "application_name": "srv1", + "client_addr": "10.20.199.3", + "state": "streaming", + "sync_state": "async", + "sync_priority": 0 + } + ], + "database_system_identifier": "6965971025273547206", + "patroni": { + "version": "2.0.2", + "scope": "patroni-demo" + } +} diff --git a/test/json/node_is_primary_ko.json b/test/json/node_is_primary_ko.json new file mode 100644 index 0000000..b697269 --- /dev/null +++ b/test/json/node_is_primary_ko.json @@ -0,0 +1,19 @@ +{ + "state": "running", + "postmaster_start_time": "2021-08-11 07:57:51.693 UTC", + "role": "replica", + "server_version": 110012, + "cluster_unlocked": false, + "xlog": { + "received_location": 1174407088, + "replayed_location": 1174407088, + "replayed_timestamp": null, + "paused": false + }, + "timeline": 58, + "database_system_identifier": "6965971025273547206", + "patroni": { + "version": "2.0.2", + "scope": "patroni-demo" + } +} diff --git a/test/json/node_is_primary_ok.json b/test/json/node_is_primary_ok.json new file mode 100644 index 0000000..d47b18b --- /dev/null +++ b/test/json/node_is_primary_ok.json @@ -0,0 +1,26 @@ +{ + "state": "running", + "postmaster_start_time": "2021-08-11 07:02:20.732 UTC", + "role": "master", + "server_version": 110012, + "cluster_unlocked": false, + "xlog": { + "location": 1174407088 + }, + "timeline": 58, + "replication": [ + { + "usename": "replicator", + "application_name": "srv1", + "client_addr": "10.20.199.3", + "state": "streaming", + "sync_state": "async", + "sync_priority": 0 + } + ], + "database_system_identifier": "6965971025273547206", + "patroni": { + "version": "2.0.2", + "scope": "patroni-demo" + } +} diff --git a/test/json/node_is_replica_ko.json b/test/json/node_is_replica_ko.json new file mode 100644 index 0000000..d47b18b --- /dev/null +++ b/test/json/node_is_replica_ko.json @@ -0,0 +1,26 @@ +{ + "state": "running", + "postmaster_start_time": "2021-08-11 07:02:20.732 UTC", + "role": "master", + "server_version": 110012, + "cluster_unlocked": false, + "xlog": { + "location": 1174407088 + }, + "timeline": 58, + "replication": [ + { + "usename": "replicator", + "application_name": "srv1", + "client_addr": "10.20.199.3", + "state": "streaming", + "sync_state": "async", + "sync_priority": 0 + } + ], + "database_system_identifier": "6965971025273547206", + "patroni": { + "version": "2.0.2", + "scope": "patroni-demo" + } +} diff --git a/test/json/node_is_replica_ok.json b/test/json/node_is_replica_ok.json new file mode 100644 index 0000000..b697269 --- /dev/null +++ b/test/json/node_is_replica_ok.json @@ -0,0 +1,19 @@ +{ + "state": "running", + "postmaster_start_time": "2021-08-11 07:57:51.693 UTC", + "role": "replica", + "server_version": 110012, + "cluster_unlocked": false, + "xlog": { + "received_location": 1174407088, + "replayed_location": 1174407088, + "replayed_timestamp": null, + "paused": false + }, + "timeline": 58, + "database_system_identifier": "6965971025273547206", + "patroni": { + "version": "2.0.2", + "scope": "patroni-demo" + } +} diff --git a/test/json/node_patroni_version.json b/test/json/node_patroni_version.json new file mode 100644 index 0000000..d47b18b --- /dev/null +++ b/test/json/node_patroni_version.json @@ -0,0 +1,26 @@ +{ + "state": "running", + "postmaster_start_time": "2021-08-11 07:02:20.732 UTC", + "role": "master", + "server_version": 110012, + "cluster_unlocked": false, + "xlog": { + "location": 1174407088 + }, + "timeline": 58, + "replication": [ + { + "usename": "replicator", + "application_name": "srv1", + "client_addr": "10.20.199.3", + "state": "streaming", + "sync_state": "async", + "sync_priority": 0 + } + ], + "database_system_identifier": "6965971025273547206", + "patroni": { + "version": "2.0.2", + "scope": "patroni-demo" + } +} diff --git a/test/json/node_tl_has_changed.json b/test/json/node_tl_has_changed.json new file mode 100644 index 0000000..d47b18b --- /dev/null +++ b/test/json/node_tl_has_changed.json @@ -0,0 +1,26 @@ +{ + "state": "running", + "postmaster_start_time": "2021-08-11 07:02:20.732 UTC", + "role": "master", + "server_version": 110012, + "cluster_unlocked": false, + "xlog": { + "location": 1174407088 + }, + "timeline": 58, + "replication": [ + { + "usename": "replicator", + "application_name": "srv1", + "client_addr": "10.20.199.3", + "state": "streaming", + "sync_state": "async", + "sync_priority": 0 + } + ], + "database_system_identifier": "6965971025273547206", + "patroni": { + "version": "2.0.2", + "scope": "patroni-demo" + } +} diff --git a/test/test_cluster_config_has_changed.py b/test/test_cluster_config_has_changed.py new file mode 100644 index 0000000..c0c62e2 --- /dev/null +++ b/test/test_cluster_config_has_changed.py @@ -0,0 +1,103 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main +from tools import my_mock, here + + +def test_cluster_config_has_changed_params(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_config_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_config_has_changed", + "--hash", + "640df9f0211c791723f18fc3ed9dbb95", + "--state-file", + str(here / "fake_file_name.state_file"), + ], + ) + assert result.exit_code == 3 + + result = runner.invoke( + main, ["-e", "https://10.20.199.3:8008", "cluster_config_has_changed"] + ) + assert result.exit_code == 3 + + +def test_cluster_config_has_changed_ok_with_hash(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_config_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_config_has_changed", + "--hash", + "640df9f0211c791723f18fc3ed9dbb95", + ], + ) + assert result.exit_code == 0 + + +def test_cluster_config_has_changed_ok_with_state_file(mocker: MockerFixture) -> None: + runner = CliRunner() + + with open(here / "cluster_config_has_changed.state_file", "w") as f: + f.write('{"hash": "640df9f0211c791723f18fc3ed9dbb95"}') + + my_mock(mocker, "cluster_config_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_config_has_changed", + "--state-file", + str(here / "cluster_config_has_changed.state_file"), + ], + ) + assert result.exit_code == 0 + + +def test_cluster_config_has_changed_ko_with_hash(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_config_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_config_has_changed", + "--hash", + "640df9f0211c791723f18fc3edffffff", + ], + ) + assert result.exit_code == 2 + + +def test_cluster_config_has_changed_ko_with_state_file(mocker: MockerFixture) -> None: + runner = CliRunner() + + with open(here / "cluster_config_has_changed.state_file", "w") as f: + f.write('{"hash": "640df9f0211c791723f18fc3edffffff"}') + + my_mock(mocker, "cluster_config_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_config_has_changed", + "--state-file", + str(here / "cluster_config_has_changed.state_file"), + ], + ) + assert result.exit_code == 2 diff --git a/test/test_cluster_has_leader.py b/test/test_cluster_has_leader.py new file mode 100644 index 0000000..cf6aa13 --- /dev/null +++ b/test/test_cluster_has_leader.py @@ -0,0 +1,29 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main + +from tools import my_mock + + +def test_cluster_has_leader_ok(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_has_leader_ok", 200) + result = runner.invoke( + main, ["-e", "https://10.20.199.3:8008", "cluster_has_leader"] + ) + assert result.exit_code == 0 + # FIXME Not captured ??? + # assert "CLUSTERHASLEADER OK - has_leader is 1 | has_leader=1;;@0" in result.output + + +def test_cluster_has_leader_ko(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_has_leader_ko", 200) + result = runner.invoke( + main, ["-e", "https://10.20.199.3:8008", "cluster_has_leader"] + ) + assert result.exit_code == 2 + # assert "CLUSTERHASLEADER CRITICAL - has_leader is 0 (outside range @0:0) | has_leader=0;;@0" in result.output diff --git a/test/test_cluster_has_replica.py b/test/test_cluster_has_replica.py new file mode 100644 index 0000000..7d414a7 --- /dev/null +++ b/test/test_cluster_has_replica.py @@ -0,0 +1,36 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main + +from tools import my_mock + + +# TODO Lag threshold tests +def test_cluster_has_relica_ok(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_has_replica_ok", 200) + result = runner.invoke( + main, ["-e", "https://10.20.199.3:8008", "cluster_has_replica"] + ) + assert result.exit_code == 0 + + +def test_cluster_has_replica_ok_with_count_thresholds(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_has_replica_ko", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_has_replica", + "--warninng", + "@2", + "--critical", + "@0:1", + ], + ) + assert result.exit_code == 2 diff --git a/test/test_cluster_node_count.py b/test/test_cluster_node_count.py new file mode 100644 index 0000000..015e2f4 --- /dev/null +++ b/test/test_cluster_node_count.py @@ -0,0 +1,115 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main + +from tools import my_mock + + +def test_cluster_node_count_ok(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_node_count_ok", 200) + result = runner.invoke( + main, ["-e", "https://10.20.199.3:8008", "cluster_node_count"] + ) + assert result.exit_code == 0 + + +def test_cluster_node_count_ok_with_thresholds(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_node_count_ok", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_node_count", + "--warning", + "@0:1", + "--critical", + "@2", + "--running-warning", + "@2", + "--running-critical", + "@0:1", + ], + ) + assert result.exit_code == 0 + + +def test_cluster_node_count_running_warning(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_node_count_running_warning", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_node_count", + "--running-warning", + "@2", + "--running-critical", + "@0:1", + ], + ) + assert result.exit_code == 1 + + +def test_cluster_node_count_running_critical(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_node_count_running_critical", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_node_count", + "--running-warning", + "@2", + "--running-critical", + "@0:1", + ], + ) + assert result.exit_code == 2 + + +def test_cluster_node_count_warning(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_node_count_warning", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_node_count", + "--warning", + "@2", + "--critical", + "@0:1", + ], + ) + assert result.exit_code == 1 + + +def test_cluster_node_count_critical(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "cluster_node_count_critical", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_node_count", + "--warning", + "@2", + "--critical", + "@0:1", + ], + ) + assert result.exit_code == 2 diff --git a/test/test_node_is_alive.py b/test/test_node_is_alive.py new file mode 100644 index 0000000..6c74562 --- /dev/null +++ b/test/test_node_is_alive.py @@ -0,0 +1,22 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main + +from tools import my_mock + + +def test_node_is_alive_ok(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_is_alive", 200) + result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_alive"]) + assert result.exit_code == 0 + + +def test_node_is_alive_ko(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_is_alive", 404) + result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_alive"]) + assert result.exit_code == 2 diff --git a/test/test_node_is_pending_restart.py b/test/test_node_is_pending_restart.py new file mode 100644 index 0000000..bb47a7a --- /dev/null +++ b/test/test_node_is_pending_restart.py @@ -0,0 +1,26 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main + +from tools import my_mock + + +def test_node_is_pending_restart_ok(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_is_pending_restart_ok", 200) + result = runner.invoke( + main, ["-e", "https://10.20.199.3:8008", "node_is_pending_restart"] + ) + assert result.exit_code == 0 + + +def test_node_is_pending_restart_ko(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_is_pending_restart_ko", 404) + result = runner.invoke( + main, ["-e", "https://10.20.199.3:8008", "node_is_pending_restart"] + ) + assert result.exit_code == 2 diff --git a/test/test_node_is_primary.py b/test/test_node_is_primary.py new file mode 100644 index 0000000..c81fc29 --- /dev/null +++ b/test/test_node_is_primary.py @@ -0,0 +1,22 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main + +from tools import my_mock + + +def test_node_is_primary_ok(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_is_primary_ok", 200) + result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_primary"]) + assert result.exit_code == 0 + + +def test_node_is_primary_ko(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_is_primary_ko", 404) + result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_primary"]) + assert result.exit_code == 2 diff --git a/test/test_node_is_replica.py b/test/test_node_is_replica.py new file mode 100644 index 0000000..e5f7254 --- /dev/null +++ b/test/test_node_is_replica.py @@ -0,0 +1,33 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main + +from tools import my_mock + + +def test_node_is_replica_ok(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_is_replica_ok", 200) + result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_replica"]) + assert result.exit_code == 0 + + +def test_node_is_replica_ko(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_is_replica_ko", 404) + result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_replica"]) + assert result.exit_code == 2 + + +def test_node_is_replica_ko_lag(mocker: MockerFixture) -> None: + runner = CliRunner() + + # We don't do the check ourselves, patroni does it and changes the return code + my_mock(mocker, "node_is_replica_ok", 404) + result = runner.invoke( + main, ["-e", "https://10.20.199.3:8008", "node_is_replica", "--lag", "100"] + ) + assert result.exit_code == 2 diff --git a/test/test_node_patroni_version.py b/test/test_node_patroni_version.py new file mode 100644 index 0000000..7e62dc9 --- /dev/null +++ b/test/test_node_patroni_version.py @@ -0,0 +1,40 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main + +from tools import my_mock + + +def test_node_patroni_version_ok(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_patroni_version", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "node_patroni_version", + "--patroni-version", + "2.0.2", + ], + ) + assert result.exit_code == 0 + + +def test_node_patroni_version_ko(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_patroni_version", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "node_patroni_version", + "--patroni-version", + "1.0.0", + ], + ) + assert result.exit_code == 2 diff --git a/test/test_node_tl_has_changed.py b/test/test_node_tl_has_changed.py new file mode 100644 index 0000000..e85fc68 --- /dev/null +++ b/test/test_node_tl_has_changed.py @@ -0,0 +1,104 @@ +from click.testing import CliRunner +from pytest_mock import MockerFixture + +from check_patroni.cli import main + +from tools import my_mock, here + + +def test_node_tl_has_changed_params(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_tl_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "node_tl_has_changed", + "--timeline", + "58", + "--state-file", + str(here / "fake_file_name.state_file"), + ], + ) + assert result.exit_code == 3 + + result = runner.invoke( + main, ["-e", "https://10.20.199.3:8008", "node_tl_has_changed"] + ) + assert result.exit_code == 3 + + +def test_node_tl_has_changed_ok_with_timeline(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_tl_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "node_tl_has_changed", + "--timeline", + "58", + ], + ) + assert result.exit_code == 0 + + +def test_node_tl_has_changed_ok_with_state_file(mocker: MockerFixture) -> None: + runner = CliRunner() + + with open(here / "node_tl_has_changed.state_file", "w") as f: + f.write('{"timeline": 58}') + + my_mock(mocker, "node_tl_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "node_tl_has_changed", + "--state-file", + str(here / "node_tl_has_changed.state_file"), + ], + ) + assert result.exit_code == 0 + + +def test_node_tl_has_changed_ko_with_timeline(mocker: MockerFixture) -> None: + runner = CliRunner() + + my_mock(mocker, "node_tl_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "node_tl_has_changed", + "--timeline", + "700", + ], + ) + assert result.exit_code == 2 + + +def test_node_tl_has_changed_ko_with_state_file(mocker: MockerFixture) -> None: + runner = CliRunner() + + with open(here / "node_tl_has_changed.state_file", "w") as f: + f.write('{"timeline": 700}') + + my_mock(mocker, "node_tl_has_changed", 200) + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "node_tl_has_changed", + "--state-file", + str(here / "node_tl_has_changed.state_file"), + ], + ) + assert result.exit_code == 2 diff --git a/test/tools.py b/test/tools.py new file mode 100644 index 0000000..8bef76b --- /dev/null +++ b/test/tools.py @@ -0,0 +1,26 @@ +import attr +import pathlib +from pytest_mock import MockerFixture + +from check_patroni.types import PatroniResource + +here = pathlib.Path(__file__).parent + + +def getjson(name: str) -> bytes: + path = here / "json" / f"{name}.json" + with path.open() as f: + return f.read().encode("utf-8") + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class MockApiReturnCode: + data: bytes + status: int + + +def my_mock(mocker: MockerFixture, json_file: str, status: int) -> None: + def mock_rest_api(self: PatroniResource, service: str) -> MockApiReturnCode: + return MockApiReturnCode(getjson(json_file), status) + + mocker.patch("check_patroni.types.PatroniResource.rest_api", mock_rest_api)