First working version
This commit is contained in:
parent
aa17162871
commit
1e6adc6a1a
10
.flake8
Normal file
10
.flake8
Normal file
|
@ -0,0 +1,10 @@
|
|||
[flake8]
|
||||
doctests = True
|
||||
ignore =
|
||||
E501, # line too long
|
||||
exclude =
|
||||
.git,
|
||||
.mypy_cache,
|
||||
.tox,
|
||||
.venv,
|
||||
mypy_config = mypy.ini
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
check_patroni/__pycache__/
|
||||
test/*.state_file
|
|
@ -1,17 +1,521 @@
|
|||
import requests
|
||||
import click
|
||||
from configparser import ConfigParser
|
||||
import nagiosplugin
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
from . import __version__
|
||||
from .cluster import (
|
||||
ClusterConfigHasChanged,
|
||||
ClusterConfigHasChangedSummary,
|
||||
ClusterHasLeader,
|
||||
ClusterHasLeaderSummary,
|
||||
ClusterHasReplica,
|
||||
ClusterNodeCount,
|
||||
)
|
||||
from .node import (
|
||||
NodeIsAlive,
|
||||
NodeIsAliveSummary,
|
||||
NodeIsPendingRestart,
|
||||
NodeIsPendingRestartSummary,
|
||||
NodeIsPrimary,
|
||||
NodeIsPrimarySummary,
|
||||
NodeIsReplica,
|
||||
NodeIsReplicaSummary,
|
||||
NodePatroniVersion,
|
||||
NodePatroniVersionSummary,
|
||||
NodeTLHasChanged,
|
||||
NodeTLHasChangedSummary,
|
||||
)
|
||||
from .types import ConnectionInfo
|
||||
|
||||
|
||||
def check_is_master(address: str = "127.0.0.1", port: int = 8008):
|
||||
r = requests.get(f"{address}:{int(port)}/leader")
|
||||
return r.status_code == 200
|
||||
def print_version(ctx: click.Context, param: str, value: str) -> None:
|
||||
if not value or ctx.resilient_parsing:
|
||||
return
|
||||
click.echo(f"Version {__version__}")
|
||||
ctx.exit()
|
||||
|
||||
|
||||
def check_is_replica(address: str = "127.0.0.1", port: int = 8008):
|
||||
r = requests.get(f"{address}:{int(port)}/replica")
|
||||
return r.status_code == 200
|
||||
DEFAULT_CFG = "config.ini"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
print(check_is_master())
|
||||
print(check_is_replica())
|
||||
print("allgood")
|
||||
def configure(ctx: click.Context, param: str, filename: str) -> None:
|
||||
"""Use a config file for the parameters
|
||||
stolen from https://jwodder.github.io/kbits/posts/click-config/
|
||||
"""
|
||||
# FIXME should use click-configfile / click-config-file ?
|
||||
cfg = ConfigParser()
|
||||
cfg.read(filename)
|
||||
ctx.default_map = {}
|
||||
for sect in cfg.sections():
|
||||
command_path = sect.split(".")
|
||||
if command_path[0] != "options":
|
||||
continue
|
||||
defaults = ctx.default_map
|
||||
for cmdname in command_path[1:]:
|
||||
defaults = defaults.setdefault(cmdname, {})
|
||||
defaults.update(cfg[sect])
|
||||
try:
|
||||
# endpoints is an array of addresses separated by ,
|
||||
if isinstance(defaults["endpoints"], str):
|
||||
defaults["endpoints"] = re.split(r"\s*,\s*", defaults["endpoints"])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.option(
|
||||
"--config",
|
||||
type=click.Path(dir_okay=False),
|
||||
default=DEFAULT_CFG,
|
||||
callback=configure,
|
||||
is_eager=True,
|
||||
expose_value=False,
|
||||
help="Read option defaults from the specified INI file",
|
||||
show_default=True,
|
||||
)
|
||||
@click.option(
|
||||
"-e",
|
||||
"--endpoints",
|
||||
"endpoints",
|
||||
type=str,
|
||||
multiple=True,
|
||||
default=["http://127.0.0.1:8008"],
|
||||
help="API endpoint. Can be specified multiple times.",
|
||||
)
|
||||
@click.option(
|
||||
"--cert_file",
|
||||
"cert_file",
|
||||
type=str,
|
||||
help="File with the client certificate.",
|
||||
)
|
||||
@click.option(
|
||||
"--key_file",
|
||||
"key_file",
|
||||
type=str,
|
||||
help="File with the client key.",
|
||||
)
|
||||
@click.option(
|
||||
"--ca_file",
|
||||
"ca_file",
|
||||
type=str,
|
||||
help="The CA certificate.",
|
||||
)
|
||||
@click.option(
|
||||
"-v",
|
||||
"--verbose",
|
||||
"verbose",
|
||||
count=True,
|
||||
help="Increase verbosity -v (info)/-vv (warning)/-vvv (debug)",
|
||||
)
|
||||
@click.option(
|
||||
"--version", is_flag=True, callback=print_version, expose_value=False, is_eager=True
|
||||
)
|
||||
@click.option(
|
||||
"--timeout",
|
||||
"timeout",
|
||||
default=2,
|
||||
type=int,
|
||||
help="Timeout in seconds for the API queries (0 to disable)",
|
||||
)
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def main(
|
||||
ctx: click.Context,
|
||||
endpoints: List[str],
|
||||
cert_file: str,
|
||||
key_file: str,
|
||||
ca_file: str,
|
||||
verbose: bool,
|
||||
timeout: int,
|
||||
) -> None:
|
||||
"""Nagios plugin for patroni."""
|
||||
ctx.obj = ConnectionInfo(endpoints, cert_file, key_file, ca_file)
|
||||
|
||||
# TODO Not all "is/has" services have the same return code for ok. Check if it's ok
|
||||
# Typing
|
||||
|
||||
|
||||
@main.command(name="cluster_node_count") # required otherwise _ are converted to -
|
||||
@click.option(
|
||||
"-w",
|
||||
"--warning",
|
||||
"warning",
|
||||
type=str,
|
||||
help="Warning threshold for the number of nodes.",
|
||||
)
|
||||
@click.option(
|
||||
"-c",
|
||||
"--critical",
|
||||
"critical",
|
||||
type=str,
|
||||
help="Critical threshold for the nimber of nodes.",
|
||||
)
|
||||
@click.option(
|
||||
"--running-warning",
|
||||
"running_warning",
|
||||
type=str,
|
||||
help="Warning threshold for the number of running nodes.",
|
||||
)
|
||||
@click.option(
|
||||
"--running-critical",
|
||||
"running_critical",
|
||||
type=str,
|
||||
help="Critical threshold for the nimber of running nodes.",
|
||||
)
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def cluster_node_count(
|
||||
ctx: click.Context,
|
||||
warning: str,
|
||||
critical: str,
|
||||
running_warning: str,
|
||||
running_critical: str,
|
||||
) -> None:
|
||||
"""Count the number of nodes in the cluster.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* Compares the number of nodes against the normal and running node warning and critical thresholds.
|
||||
* `OK`! If they are not provided.
|
||||
|
||||
\b
|
||||
Perfdata:
|
||||
* `members`: the member count.
|
||||
* all the roles of the nodes in the cluster with their number.
|
||||
"""
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
ClusterNodeCount(ctx.obj),
|
||||
nagiosplugin.ScalarContext(
|
||||
"members",
|
||||
warning,
|
||||
critical,
|
||||
),
|
||||
nagiosplugin.ScalarContext(
|
||||
"state_running",
|
||||
running_warning,
|
||||
running_critical,
|
||||
),
|
||||
nagiosplugin.ScalarContext("members_roles"),
|
||||
nagiosplugin.ScalarContext("members_statuses"),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="cluster_has_leader")
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def cluster_has_leader(ctx: click.Context) -> None:
|
||||
"""Check if the cluster has a leader.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: if there is a leader node.
|
||||
* `CRITICAL`: otherwise
|
||||
|
||||
Perfdata : `has_leader` is 1 if there is a leader node, 0 otherwise
|
||||
"""
|
||||
# TODO: Manage primary or standby leader in the same place ?
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
ClusterHasLeader(ctx.obj),
|
||||
nagiosplugin.ScalarContext("has_leader", None, "@0:0"),
|
||||
ClusterHasLeaderSummary(),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="cluster_has_replica")
|
||||
@click.option(
|
||||
"-w",
|
||||
"--warning",
|
||||
"warning",
|
||||
type=str,
|
||||
help="Warning threshold for the number of nodes.",
|
||||
)
|
||||
@click.option(
|
||||
"-c",
|
||||
"--critical",
|
||||
"critical",
|
||||
type=str,
|
||||
help="Critical threshold for the number of replica nodes.",
|
||||
)
|
||||
@click.option(
|
||||
"--lag-warning", "lag_warning", type=str, help="Warning threshold for the lag."
|
||||
)
|
||||
# FIWME how do we manage maximum_lag_on_failover without doing many api calls
|
||||
@click.option(
|
||||
"--lag-critical", "lag_critical", type=str, help="Critical threshold for the lag."
|
||||
)
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def cluster_has_replica(
|
||||
ctx: click.Context, warning: str, critical: str, lag_warning: str, lag_critical: str
|
||||
) -> None:
|
||||
"""Check if the cluster has replicas and their lag.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: if the replica count and their lag are compatible with the replica count and lag thresholds.
|
||||
* `WARNING` / `CRITICAL`: otherwise
|
||||
|
||||
\b
|
||||
Perfdata :
|
||||
* replica count
|
||||
* the lag of each replica labelled with "member name"_lag
|
||||
"""
|
||||
# FIXME the idea here would be to make sur we have a replica.
|
||||
# lag should be check to prune invalid replicas
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
ClusterHasReplica(ctx.obj),
|
||||
nagiosplugin.ScalarContext(
|
||||
"replica_count",
|
||||
warning,
|
||||
critical,
|
||||
),
|
||||
nagiosplugin.ScalarContext(
|
||||
"replica_lag",
|
||||
lag_warning,
|
||||
lag_critical,
|
||||
),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="cluster_config_has_changed")
|
||||
@click.option("--hash", "config_hash", type=str, help="A hash to compare with.")
|
||||
@click.option(
|
||||
"-s",
|
||||
"--state-file",
|
||||
"state_file",
|
||||
type=str,
|
||||
help="A state file to store the tl number into.",
|
||||
)
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def cluster_config_has_changed(
|
||||
ctx: click.Context, config_hash: str, state_file: str
|
||||
) -> None:
|
||||
"""Check if the hash of the configuration has changed.
|
||||
|
||||
Note: either a hash or a state file must be provided for this service to work.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: The hash didn't change
|
||||
* `CRITICAL`: The hash of the configuration has changed compared to the input (`--hash`) or last time (`--state_file`)
|
||||
|
||||
\b
|
||||
Perfdata :
|
||||
* `is_configuration_changed` is 1 if the configuration has changed
|
||||
"""
|
||||
# FIXME hash in perfdata ?
|
||||
if (config_hash is None and state_file is None) or (
|
||||
config_hash is not None and state_file is not None
|
||||
):
|
||||
raise click.UsageError(
|
||||
"Either --hash or --state-file should be provided for this service", ctx
|
||||
)
|
||||
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
ClusterConfigHasChanged(ctx.obj, config_hash, state_file),
|
||||
nagiosplugin.ScalarContext("is_configuration_changed", None, "@1:1"),
|
||||
ClusterConfigHasChangedSummary(),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="node_is_primary")
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def node_is_primary(ctx: click.Context) -> None:
|
||||
"""Check if the node is the primary with the leader lock.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: if the node is a primary with the leader lock.
|
||||
* `CRITICAL:` otherwise
|
||||
|
||||
Perfdata: `is_primary` is 1 if the node is a primary with the leader lock, 0 otherwise.
|
||||
"""
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
NodeIsPrimary(ctx.obj),
|
||||
nagiosplugin.ScalarContext("is_primary", None, "@0:0"),
|
||||
NodeIsPrimarySummary(),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="node_is_replica")
|
||||
@click.option("--lag", "lag", type=str, help="maximum allowed lag")
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def node_is_replica(ctx: click.Context, lag: str) -> None:
|
||||
"""Check if the node is a running replica with no noloadbalance tag.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: if the node is a running replica with noloadbalance tag and the lag is under the maximum threshold.
|
||||
* `CRITICAL`: otherwise
|
||||
|
||||
Perfdata : `is_replica` is 1 if the node is a running replica with noloadbalance tag and the lag is under the maximum threshold, 0 otherwise.
|
||||
"""
|
||||
# add a lag check ??
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
NodeIsReplica(ctx.obj, lag),
|
||||
nagiosplugin.ScalarContext("is_replica", None, "@0:0"),
|
||||
NodeIsReplicaSummary(lag),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="node_is_pending_restart")
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def node_is_pending_restart(ctx: click.Context) -> None:
|
||||
"""Check if the node is in pending restart state.
|
||||
|
||||
This situation can arise if the configuration has been modified but
|
||||
requiers arestart of PostgreSQL.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: if the node has pending restart tag.
|
||||
* `CRITICAL`: otherwise
|
||||
|
||||
Perfdata: `is_pending_restart` is 1 if the node has pending restart tag, 0 otherwise.
|
||||
"""
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
NodeIsPendingRestart(ctx.obj),
|
||||
nagiosplugin.ScalarContext("is_pending_restart", None, "@1:1"),
|
||||
NodeIsPendingRestartSummary(),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="node_tl_has_changed")
|
||||
@click.option(
|
||||
"--timeline", "timeline", type=str, help="A timeline number to compare with."
|
||||
)
|
||||
@click.option(
|
||||
"-s",
|
||||
"--state-file",
|
||||
"state_file",
|
||||
type=str,
|
||||
help="A state file to store the last tl number into.",
|
||||
)
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def node_tl_has_changed(ctx: click.Context, timeline: str, state_file: str) -> None:
|
||||
"""Check if the timeline hash changed.
|
||||
|
||||
Note: either a timeline or a state file must be provided for this service to work.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: The timeline is the same as last time (`--state_file`) or the inputed timeline (`--timeline`)
|
||||
* `CRITICAL`: The tl is not the same.
|
||||
|
||||
\b
|
||||
Perfdata :
|
||||
* `is_configuration_changed` is 1 if the configuration has changed, 0 otherwise
|
||||
"""
|
||||
if (timeline is None and state_file is None) or (
|
||||
timeline is not None and state_file is not None
|
||||
):
|
||||
raise click.UsageError(
|
||||
"Either --timeline or --state-file should be provided for this service", ctx
|
||||
)
|
||||
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
NodeTLHasChanged(ctx.obj, timeline, state_file),
|
||||
nagiosplugin.ScalarContext("is_timeline_changed", None, "@1:1"),
|
||||
nagiosplugin.ScalarContext("timeline"),
|
||||
NodeTLHasChangedSummary(timeline),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="node_patroni_version")
|
||||
@click.option(
|
||||
"--patroni-version",
|
||||
"patroni_version",
|
||||
type=str,
|
||||
help="Patroni version to compare to",
|
||||
required=True,
|
||||
)
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def node_patroni_version(ctx: click.Context, patroni_version: str) -> None:
|
||||
"""Check if the version is equal to the input
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: The version is the same as the input `--patroni-version`
|
||||
* `CRITICAL`: otherwise.
|
||||
|
||||
\b
|
||||
Perfdata :
|
||||
* `is_version_ok` is 1 if version is ok, 0 otherwise
|
||||
"""
|
||||
# TODO the version cannot be written in perfdata find something else ?
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
NodePatroniVersion(ctx.obj, patroni_version),
|
||||
nagiosplugin.ScalarContext("is_version_ok", None, "@0:0"),
|
||||
nagiosplugin.ScalarContext("patroni_version"),
|
||||
NodePatroniVersionSummary(patroni_version),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="node_is_alive")
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def node_is_alive(ctx: click.Context) -> None:
|
||||
"""Check if the node is alive ie patroni is running.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: If patroni is running.
|
||||
* `CRITICAL`: otherwise.
|
||||
|
||||
\b
|
||||
Perfdata :
|
||||
* `is_running` is 1 if patroni is running, 0 otherwise
|
||||
"""
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
NodeIsAlive(ctx.obj),
|
||||
nagiosplugin.ScalarContext("is_alive", None, "@0:0"),
|
||||
NodeIsAliveSummary(),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
|
164
check_patroni/cluster.py
Normal file
164
check_patroni/cluster.py
Normal file
|
@ -0,0 +1,164 @@
|
|||
from collections import Counter
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import nagiosplugin
|
||||
|
||||
from .types import PatroniResource, ConnectionInfo, handle_unknown
|
||||
|
||||
_log = logging.getLogger("nagiosplugin")
|
||||
|
||||
|
||||
def replace_chars(text: str) -> str:
|
||||
return text.replace("'", "").replace(" ", "_")
|
||||
|
||||
|
||||
class ClusterNodeCount(PatroniResource):
|
||||
def probe(self: "ClusterNodeCount") -> nagiosplugin.Metric:
|
||||
r = self.rest_api("cluster")
|
||||
# FIXME RC <> 200 ?
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
role_counters = Counter()
|
||||
roles = []
|
||||
status_counters = Counter()
|
||||
statuses = []
|
||||
|
||||
for member in item_dict["members"]:
|
||||
roles.append(replace_chars(member["role"]))
|
||||
statuses.append(replace_chars(member["state"]))
|
||||
role_counters.update(roles)
|
||||
status_counters.update(statuses)
|
||||
|
||||
# The actual check: members, running state
|
||||
yield nagiosplugin.Metric("members", len(item_dict["members"]))
|
||||
yield nagiosplugin.Metric("state_running", status_counters["running"])
|
||||
|
||||
# The performance data : role
|
||||
for role in role_counters:
|
||||
yield nagiosplugin.Metric(
|
||||
f"role_{role}", role_counters[role], context="members_roles"
|
||||
)
|
||||
|
||||
# The performance data : statuses (except running)
|
||||
for state in status_counters:
|
||||
if state != "running":
|
||||
yield nagiosplugin.Metric(
|
||||
f"state_{state}", status_counters[state], context="members_statuses"
|
||||
)
|
||||
|
||||
|
||||
class ClusterHasLeader(PatroniResource):
|
||||
def probe(self: "ClusterHasLeader") -> nagiosplugin.Metric:
|
||||
r = self.rest_api("cluster")
|
||||
# FIXME RC <> 200 ?
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
is_leader_found = False
|
||||
for member in item_dict["members"]:
|
||||
if member["role"] == "leader" and member["state"] == "running":
|
||||
is_leader_found = True
|
||||
break
|
||||
|
||||
return [
|
||||
nagiosplugin.Metric(
|
||||
"has_leader",
|
||||
1 if is_leader_found else 0,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class ClusterHasLeaderSummary(nagiosplugin.Summary):
|
||||
def ok(self: "ClusterHasLeaderSummary", results: nagiosplugin.Result) -> str:
|
||||
return "The cluster has a running leader."
|
||||
|
||||
@handle_unknown
|
||||
def problem(self: "ClusterHasLeaderSummary", results: nagiosplugin.Result) -> str:
|
||||
return "The cluster has no running leader."
|
||||
|
||||
|
||||
class ClusterHasReplica(PatroniResource):
|
||||
def probe(self: "ClusterHasReplica") -> nagiosplugin.Metric:
|
||||
r = self.rest_api("cluster")
|
||||
# FIXME RC <> 200 ?
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
replicas = []
|
||||
for member in item_dict["members"]:
|
||||
# FIXME are there other acceptable states
|
||||
if member["role"] == "replica" and member["state"] == "running":
|
||||
# FIXME which lag ?
|
||||
replicas.append({"name": member["name"], "lag": member["lag"]})
|
||||
break
|
||||
|
||||
# The actual check
|
||||
yield nagiosplugin.Metric("replica_count", len(replicas))
|
||||
|
||||
# The performance data : replicas lag
|
||||
for replica in replicas:
|
||||
yield nagiosplugin.Metric(
|
||||
f"{replica['name']}_lag", replica["lag"], context="replica_lag"
|
||||
)
|
||||
|
||||
|
||||
# FIXME is this needed ??
|
||||
# class ClusterHasReplicaSummary(nagiosplugin.Summary):
|
||||
# def ok(self, results):
|
||||
# def problem(self, results):
|
||||
|
||||
|
||||
class ClusterConfigHasChanged(PatroniResource):
|
||||
def __init__(
|
||||
self: "ClusterConfigHasChanged",
|
||||
connection_info: ConnectionInfo,
|
||||
config_hash: str,
|
||||
state_file: str,
|
||||
):
|
||||
super().__init__(connection_info)
|
||||
self.state_file = state_file
|
||||
self.config_hash = config_hash
|
||||
|
||||
def probe(self: "ClusterConfigHasChanged") -> nagiosplugin.Metric:
|
||||
r = self.rest_api("config")
|
||||
# FIXME RC <> 200 ?
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
new_hash = hashlib.md5(r.data).hexdigest()
|
||||
|
||||
if self.state_file is not None:
|
||||
_log.debug(f"Using state file / cookie {self.state_file}")
|
||||
cookie = nagiosplugin.Cookie(self.state_file)
|
||||
cookie.open()
|
||||
old_hash = cookie.get("hash")
|
||||
cookie["hash"] = new_hash
|
||||
cookie.commit()
|
||||
else:
|
||||
_log.debug(f"Using input value {self.config_hash}")
|
||||
old_hash = self.config_hash
|
||||
|
||||
_log.debug(f"hash info: old hash {old_hash}, new hash {new_hash}")
|
||||
|
||||
return [
|
||||
nagiosplugin.Metric(
|
||||
"is_configuration_changed",
|
||||
1 if new_hash != old_hash else 0,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class ClusterConfigHasChangedSummary(nagiosplugin.Summary):
|
||||
def ok(self: "ClusterConfigHasChangedSummary", results: nagiosplugin.Result) -> str:
|
||||
return "The hash of patroni's dynamic configuration has not changed."
|
||||
|
||||
@handle_unknown
|
||||
def problem(
|
||||
self: "ClusterConfigHasChangedSummary", results: nagiosplugin.Result
|
||||
) -> str:
|
||||
return "The hash of patroni's dynamic configuration has changed."
|
205
check_patroni/node.py
Normal file
205
check_patroni/node.py
Normal file
|
@ -0,0 +1,205 @@
|
|||
import json
|
||||
import logging
|
||||
import nagiosplugin
|
||||
|
||||
from .types import ConnectionInfo, handle_unknown, PatroniResource
|
||||
|
||||
_log = logging.getLogger("nagiosplugin")
|
||||
|
||||
|
||||
class NodeIsPrimary(PatroniResource):
|
||||
def probe(self: "NodeIsPrimary") -> nagiosplugin.Metric:
|
||||
r = self.rest_api("primary")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
return [nagiosplugin.Metric("is_primary", 1 if r.status == 200 else 0)]
|
||||
|
||||
|
||||
class NodeIsPrimarySummary(nagiosplugin.Summary):
|
||||
def ok(self: "NodeIsPrimarySummary", results: nagiosplugin.Result) -> str:
|
||||
return "This node is the primary with the leader lock."
|
||||
|
||||
@handle_unknown
|
||||
def problem(self: "NodeIsPrimarySummary", results: nagiosplugin.Result) -> str:
|
||||
return "This node is not the primary with the leader lock."
|
||||
|
||||
|
||||
class NodeIsReplica(PatroniResource):
|
||||
def __init__(
|
||||
self: "NodeIsReplica", connection_info: ConnectionInfo, lag: str
|
||||
) -> None:
|
||||
super().__init__(connection_info)
|
||||
self.lag = lag
|
||||
|
||||
def probe(self: "NodeIsReplica") -> nagiosplugin.Metric:
|
||||
if self.lag is None:
|
||||
r = self.rest_api("replica")
|
||||
else:
|
||||
r = self.rest_api(f"replica?lag={self.lag}")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
return [nagiosplugin.Metric("is_replica", 1 if r.status == 200 else 0)]
|
||||
|
||||
|
||||
class NodeIsReplicaSummary(nagiosplugin.Summary):
|
||||
def __init__(self: "NodeIsReplicaSummary", lag: str) -> None:
|
||||
self.lag = lag
|
||||
|
||||
def ok(self: "NodeIsReplicaSummary", results: nagiosplugin.Result) -> str:
|
||||
if self.lag is None:
|
||||
return "This node is a running replica with no noloadbalance tag."
|
||||
return f"This node is a running replica with no noloadbalance tag and the lag is under {self.lag}."
|
||||
|
||||
@handle_unknown
|
||||
def problem(self: "NodeIsReplicaSummary", results: nagiosplugin.Result) -> str:
|
||||
if self.lag is None:
|
||||
return "This node is not a running replica with no noloadbalance tag."
|
||||
return f"This node is not a running replica with no noloadbalance tag and a lag under {self.lag}."
|
||||
|
||||
|
||||
class NodeIsPendingRestart(PatroniResource):
|
||||
def probe(self: "NodeIsPendingRestart") -> nagiosplugin.Metric:
|
||||
r = self.rest_api("patroni")
|
||||
# FIXME RC <> 200 ?
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
is_pending_restart = item_dict.get("pending_restart", False)
|
||||
return [
|
||||
nagiosplugin.Metric(
|
||||
"is_pending_restart",
|
||||
1 if is_pending_restart else 0,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class NodeIsPendingRestartSummary(nagiosplugin.Summary):
|
||||
def ok(self: "NodeIsPendingRestartSummary", results: nagiosplugin.Result) -> str:
|
||||
return "This node doesn't have the pending restart flag."
|
||||
|
||||
@handle_unknown
|
||||
def problem(
|
||||
self: "NodeIsPendingRestartSummary", results: nagiosplugin.Result
|
||||
) -> str:
|
||||
return "This node has the pending restart flag."
|
||||
|
||||
|
||||
class NodeTLHasChanged(PatroniResource):
|
||||
def __init__(
|
||||
self: "NodeTLHasChanged",
|
||||
connection_info: ConnectionInfo,
|
||||
timeline: str,
|
||||
state_file: str,
|
||||
) -> None:
|
||||
super().__init__(connection_info)
|
||||
self.state_file = state_file
|
||||
self.timeline = timeline
|
||||
|
||||
def probe(self: "NodeTLHasChanged") -> nagiosplugin.Metric:
|
||||
r = self.rest_api("patroni")
|
||||
# FIXME RC <> 200 ?
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
new_tl = item_dict["timeline"]
|
||||
|
||||
if self.state_file is not None:
|
||||
_log.debug(f"Using state file / cookie {self.state_file}")
|
||||
cookie = nagiosplugin.Cookie(self.state_file)
|
||||
cookie.open()
|
||||
old_tl = cookie.get("timeline")
|
||||
cookie["timeline"] = new_tl
|
||||
cookie.commit()
|
||||
else:
|
||||
_log.debug(f"Using input value {self.timeline}")
|
||||
old_tl = self.timeline
|
||||
|
||||
_log.debug(f"Tl data: old tl {old_tl}, new tl {new_tl}")
|
||||
|
||||
# The actual check
|
||||
yield nagiosplugin.Metric(
|
||||
"is_timeline_changed",
|
||||
1 if str(new_tl) != str(old_tl) else 0,
|
||||
)
|
||||
|
||||
# The performance data : the timeline number
|
||||
yield nagiosplugin.Metric("timeline", new_tl)
|
||||
|
||||
|
||||
class NodeTLHasChangedSummary(nagiosplugin.Summary):
|
||||
def __init__(self: "NodeTLHasChangedSummary", timeline: str) -> None:
|
||||
self.timeline = timeline
|
||||
|
||||
def ok(self: "NodeTLHasChangedSummary", results: nagiosplugin.Result) -> str:
|
||||
return f"The timeline is still {self.timeline}."
|
||||
|
||||
@handle_unknown
|
||||
def problem(self: "NodeTLHasChangedSummary", results: nagiosplugin.Result) -> str:
|
||||
return f"The expected timeline was {self.timeline} got {results['timeline'].metric}."
|
||||
|
||||
|
||||
class NodePatroniVersion(PatroniResource):
|
||||
def __init__(
|
||||
self: "NodePatroniVersion",
|
||||
connection_info: ConnectionInfo,
|
||||
patroni_version: str,
|
||||
) -> None:
|
||||
super().__init__(connection_info)
|
||||
self.patroni_version = patroni_version
|
||||
|
||||
def probe(self: "NodePatroniVersion") -> nagiosplugin.Metric:
|
||||
r = self.rest_api("patroni")
|
||||
# FIXME RC <> 200 ?
|
||||
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
version = item_dict["patroni"]["version"]
|
||||
_log.debug(
|
||||
f"Version data: patroni version {version} input version {self.patroni_version}"
|
||||
)
|
||||
|
||||
# The actual check
|
||||
return [
|
||||
nagiosplugin.Metric(
|
||||
"is_version_ok",
|
||||
1 if version == self.patroni_version else 0,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class NodePatroniVersionSummary(nagiosplugin.Summary):
|
||||
def __init__(self: "NodePatroniVersionSummary", patroni_version: str) -> None:
|
||||
self.patroni_version = patroni_version
|
||||
|
||||
def ok(self: "NodePatroniVersionSummary", results: nagiosplugin.Result) -> str:
|
||||
return f"Patroni's version is {self.patroni_version}."
|
||||
|
||||
@handle_unknown
|
||||
def problem(self: "NodePatroniVersionSummary", results: nagiosplugin.Result) -> str:
|
||||
# FIXME find a way to make the following work, check is perf data can be strings
|
||||
# return f"The expected patroni version was {self.patroni_version} got {results['patroni_version'].metric}."
|
||||
return f"Patroni's version is not {self.patroni_version}."
|
||||
|
||||
|
||||
class NodeIsAlive(PatroniResource):
|
||||
def probe(self: "NodeIsAlive") -> nagiosplugin.Metric:
|
||||
r = self.rest_api("liveness")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
return [nagiosplugin.Metric("is_alive", 1 if r.status == 200 else 0)]
|
||||
|
||||
|
||||
class NodeIsAliveSummary(nagiosplugin.Summary):
|
||||
def ok(self: "NodeIsAliveSummary", results: nagiosplugin.Result) -> str:
|
||||
return "This node is alive (patroni is running)."
|
||||
|
||||
@handle_unknown
|
||||
def problem(self: "NodeIsAliveSummary", results: nagiosplugin.Result) -> str:
|
||||
return "This node is not alive (patroni is not running)."
|
63
check_patroni/types.py
Normal file
63
check_patroni/types.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
import attr
|
||||
import logging
|
||||
import nagiosplugin
|
||||
import urllib3
|
||||
from typing import Any, Callable, List
|
||||
|
||||
_log = logging.getLogger("nagiosplugin")
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True, slots=True)
|
||||
class ConnectionInfo:
|
||||
endpoints: List[str] = ["http://127.0.0.1:8008"]
|
||||
cert_file: str = "./ssl/benoit-dalibo-cert.pem"
|
||||
key_file: str = "./ssl/benoit-dalibo-key.pem"
|
||||
ca_cert: str = "./ssl/CA-cert.pem"
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, slots=True)
|
||||
class PatroniResource(nagiosplugin.Resource):
|
||||
conn_info: ConnectionInfo
|
||||
|
||||
def rest_api(
|
||||
self: "PatroniResource", service: str
|
||||
) -> urllib3.response.HTTPResponse:
|
||||
"""Try to connect to all the provided endpoints for the requested service"""
|
||||
for endpoint in self.conn_info.endpoints:
|
||||
try:
|
||||
if endpoint[:5] == "https":
|
||||
pool = urllib3.PoolManager(
|
||||
cert_reqs="CERT_REQUIRED",
|
||||
cert_file=self.conn_info.cert_file,
|
||||
key_file=self.conn_info.key_file,
|
||||
ca_certs=self.conn_info.ca_cert,
|
||||
)
|
||||
else:
|
||||
pool = urllib3.PoolManager()
|
||||
|
||||
_log.debug(f"Trying to connect to {endpoint}/{service}")
|
||||
return pool.request(
|
||||
"GET",
|
||||
f"{endpoint}/{service}",
|
||||
)
|
||||
except nagiosplugin.Timeout as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
_log.debug(e)
|
||||
continue
|
||||
raise nagiosplugin.CheckError("Connection failed for all provided endpoints")
|
||||
|
||||
|
||||
HandleUnknown = Callable[[nagiosplugin.Summary, nagiosplugin.Result], Any]
|
||||
|
||||
|
||||
def handle_unknown(action: HandleUnknown) -> HandleUnknown:
|
||||
"""decorator to handle the unknown state in Summary.problem"""
|
||||
|
||||
def wrapper(summary: nagiosplugin.Summary, results: nagiosplugin.Result) -> Any:
|
||||
if results.most_significant[0].state.code == 3:
|
||||
"""get the appropriate message for all unknown error"""
|
||||
return results.most_significant[0].hint
|
||||
return action(summary, results)
|
||||
|
||||
return wrapper
|
9
config.ini
Normal file
9
config.ini
Normal file
|
@ -0,0 +1,9 @@
|
|||
[options]
|
||||
endpoints = https://10.20.199.3:8008, https://10.20.199.4:8008,https://10.20.199.5:8008
|
||||
cert_file = ./ssl/benoit-dalibo-cert.pem
|
||||
key_file = ./ssl/benoit-dalibo-key.pem
|
||||
ca_file = ./ssl/CA-cert.pem
|
||||
timeout = 0
|
||||
|
||||
[options.node_is_replica]
|
||||
lag=100
|
5
mypy.ini
Normal file
5
mypy.ini
Normal file
|
@ -0,0 +1,5 @@
|
|||
[mypy]
|
||||
# nagiosplugin => Skipping analyzing "nagiosplugin": found module but no type hints or library stubs [import]
|
||||
ignore_missing_imports = true
|
||||
show_error_codes = true
|
||||
strict = true
|
31
setup.py
31
setup.py
|
@ -19,24 +19,29 @@ def get_version() -> str:
|
|||
setup(
|
||||
name="check_patroni",
|
||||
version=get_version(),
|
||||
# author="Dalibo",
|
||||
# author_email="contact@dalibo.com",
|
||||
# author="Dalibo",
|
||||
# author_email="contact@dalibo.com",
|
||||
packages=find_packages("."),
|
||||
include_package_data=True,
|
||||
# url="https://github.com/dalibo/pg_activity",
|
||||
# url="https://github.com/dalibo/pg_activity",
|
||||
license="PostgreSQL",
|
||||
description="Nagios plugin to check on patroni",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
# classifiers=[
|
||||
# "Development Status :: 5 - Production/Stable",
|
||||
# "Environment :: Console",
|
||||
# "License :: OSI Approved :: PostgreSQL License",
|
||||
# "Programming Language :: Python :: 3",
|
||||
# "Topic :: Database",
|
||||
# ],
|
||||
keywords="patroni nagios cehck",
|
||||
# classifiers=[
|
||||
# "Development Status :: 5 - Production/Stable",
|
||||
# "Environment :: Console",
|
||||
# "License :: OSI Approved :: PostgreSQL License",
|
||||
# "Programming Language :: Python :: 3",
|
||||
# "Topic :: Database",
|
||||
# ],
|
||||
keywords="patroni nagios check",
|
||||
python_requires=">=3.6",
|
||||
install_requires=[
|
||||
"urllib3 >= 1.26.6",
|
||||
"nagiosplugin >= 1.3.2",
|
||||
"click >= 8.0.1",
|
||||
],
|
||||
extras_require={
|
||||
"dev": [
|
||||
"black",
|
||||
|
@ -44,6 +49,10 @@ setup(
|
|||
"flake8",
|
||||
"mypy",
|
||||
],
|
||||
"test": [
|
||||
"pytest",
|
||||
"pytest-mock",
|
||||
],
|
||||
},
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
|
|
BIN
test/__pycache__/test.cpython-39-pytest-6.2.4.pyc
Normal file
BIN
test/__pycache__/test.cpython-39-pytest-6.2.4.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
test/__pycache__/test_node_is_alive.cpython-39-pytest-6.2.4.pyc
Normal file
BIN
test/__pycache__/test_node_is_alive.cpython-39-pytest-6.2.4.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
test/__pycache__/tools.cpython-39.pyc
Normal file
BIN
test/__pycache__/tools.cpython-39.pyc
Normal file
Binary file not shown.
16
test/json/cluster_config_has_changed.json
Normal file
16
test/json/cluster_config_has_changed.json
Normal file
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"loop_wait": 10,
|
||||
"master_start_timeout": 300,
|
||||
"postgresql": {
|
||||
"parameters": {
|
||||
"archive_command": "pgbackrest --stanza=main archive-push %p",
|
||||
"archive_mode": "on",
|
||||
"max_connections": 300,
|
||||
"restore_command": "pgbackrest --stanza=main archive-get %f \"%p\""
|
||||
},
|
||||
"use_pg_rewind": false,
|
||||
"use_slot": true
|
||||
},
|
||||
"retry_timeout": 10,
|
||||
"ttl": 30
|
||||
}
|
33
test/json/cluster_has_leader_ko.json
Normal file
33
test/json/cluster_has_leader_ko.json
Normal file
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
]
|
||||
}
|
33
test/json/cluster_has_leader_ok.json
Normal file
33
test/json/cluster_has_leader_ok.json
Normal file
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
]
|
||||
}
|
33
test/json/cluster_has_replica_ok.json
Normal file
33
test/json/cluster_has_replica_ok.json
Normal file
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
]
|
||||
}
|
32
test/json/cluster_node_count.json
Normal file
32
test/json/cluster_node_count.json
Normal file
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "start failed",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"lag": "unknown"
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
]
|
||||
}
|
13
test/json/cluster_node_count_critical.json
Normal file
13
test/json/cluster_node_count_critical.json
Normal file
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
}
|
||||
]
|
||||
}
|
33
test/json/cluster_node_count_ok.json
Normal file
33
test/json/cluster_node_count_ok.json
Normal file
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
]
|
||||
}
|
31
test/json/cluster_node_count_running_critical.json
Normal file
31
test/json/cluster_node_count_running_critical.json
Normal file
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "start failed",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"lag": "unknown"
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "start failed",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"lag": "unknown"
|
||||
}
|
||||
]
|
||||
}
|
23
test/json/cluster_node_count_running_warning.json
Normal file
23
test/json/cluster_node_count_running_warning.json
Normal file
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
]
|
||||
}
|
23
test/json/cluster_node_count_warning.json
Normal file
23
test/json/cluster_node_count_warning.json
Normal file
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
]
|
||||
}
|
19
test/json/node_is_alive.json
Normal file
19
test/json/node_is_alive.json
Normal file
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"state": "running",
|
||||
"postmaster_start_time": "2021-08-11 07:57:51.693 UTC",
|
||||
"role": "replica",
|
||||
"server_version": 110012,
|
||||
"cluster_unlocked": false,
|
||||
"xlog": {
|
||||
"received_location": 1174407088,
|
||||
"replayed_location": 1174407088,
|
||||
"replayed_timestamp": null,
|
||||
"paused": false
|
||||
},
|
||||
"timeline": 58,
|
||||
"database_system_identifier": "6965971025273547206",
|
||||
"patroni": {
|
||||
"version": "2.0.2",
|
||||
"scope": "patroni-demo"
|
||||
}
|
||||
}
|
27
test/json/node_is_pending_restart_ko.json
Normal file
27
test/json/node_is_pending_restart_ko.json
Normal file
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
"state": "running",
|
||||
"postmaster_start_time": "2021-08-11 07:02:20.732 UTC",
|
||||
"role": "master",
|
||||
"server_version": 110012,
|
||||
"cluster_unlocked": false,
|
||||
"xlog": {
|
||||
"location": 1174407088
|
||||
},
|
||||
"timeline": 58,
|
||||
"replication": [
|
||||
{
|
||||
"usename": "replicator",
|
||||
"application_name": "srv1",
|
||||
"client_addr": "10.20.199.3",
|
||||
"state": "streaming",
|
||||
"sync_state": "async",
|
||||
"sync_priority": 0
|
||||
}
|
||||
],
|
||||
"pending_restart": true,
|
||||
"database_system_identifier": "6965971025273547206",
|
||||
"patroni": {
|
||||
"version": "2.0.2",
|
||||
"scope": "patroni-demo"
|
||||
}
|
||||
}
|
26
test/json/node_is_pending_restart_ok.json
Normal file
26
test/json/node_is_pending_restart_ok.json
Normal file
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"state": "running",
|
||||
"postmaster_start_time": "2021-08-11 07:02:20.732 UTC",
|
||||
"role": "master",
|
||||
"server_version": 110012,
|
||||
"cluster_unlocked": false,
|
||||
"xlog": {
|
||||
"location": 1174407088
|
||||
},
|
||||
"timeline": 58,
|
||||
"replication": [
|
||||
{
|
||||
"usename": "replicator",
|
||||
"application_name": "srv1",
|
||||
"client_addr": "10.20.199.3",
|
||||
"state": "streaming",
|
||||
"sync_state": "async",
|
||||
"sync_priority": 0
|
||||
}
|
||||
],
|
||||
"database_system_identifier": "6965971025273547206",
|
||||
"patroni": {
|
||||
"version": "2.0.2",
|
||||
"scope": "patroni-demo"
|
||||
}
|
||||
}
|
19
test/json/node_is_primary_ko.json
Normal file
19
test/json/node_is_primary_ko.json
Normal file
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"state": "running",
|
||||
"postmaster_start_time": "2021-08-11 07:57:51.693 UTC",
|
||||
"role": "replica",
|
||||
"server_version": 110012,
|
||||
"cluster_unlocked": false,
|
||||
"xlog": {
|
||||
"received_location": 1174407088,
|
||||
"replayed_location": 1174407088,
|
||||
"replayed_timestamp": null,
|
||||
"paused": false
|
||||
},
|
||||
"timeline": 58,
|
||||
"database_system_identifier": "6965971025273547206",
|
||||
"patroni": {
|
||||
"version": "2.0.2",
|
||||
"scope": "patroni-demo"
|
||||
}
|
||||
}
|
26
test/json/node_is_primary_ok.json
Normal file
26
test/json/node_is_primary_ok.json
Normal file
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"state": "running",
|
||||
"postmaster_start_time": "2021-08-11 07:02:20.732 UTC",
|
||||
"role": "master",
|
||||
"server_version": 110012,
|
||||
"cluster_unlocked": false,
|
||||
"xlog": {
|
||||
"location": 1174407088
|
||||
},
|
||||
"timeline": 58,
|
||||
"replication": [
|
||||
{
|
||||
"usename": "replicator",
|
||||
"application_name": "srv1",
|
||||
"client_addr": "10.20.199.3",
|
||||
"state": "streaming",
|
||||
"sync_state": "async",
|
||||
"sync_priority": 0
|
||||
}
|
||||
],
|
||||
"database_system_identifier": "6965971025273547206",
|
||||
"patroni": {
|
||||
"version": "2.0.2",
|
||||
"scope": "patroni-demo"
|
||||
}
|
||||
}
|
26
test/json/node_is_replica_ko.json
Normal file
26
test/json/node_is_replica_ko.json
Normal file
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"state": "running",
|
||||
"postmaster_start_time": "2021-08-11 07:02:20.732 UTC",
|
||||
"role": "master",
|
||||
"server_version": 110012,
|
||||
"cluster_unlocked": false,
|
||||
"xlog": {
|
||||
"location": 1174407088
|
||||
},
|
||||
"timeline": 58,
|
||||
"replication": [
|
||||
{
|
||||
"usename": "replicator",
|
||||
"application_name": "srv1",
|
||||
"client_addr": "10.20.199.3",
|
||||
"state": "streaming",
|
||||
"sync_state": "async",
|
||||
"sync_priority": 0
|
||||
}
|
||||
],
|
||||
"database_system_identifier": "6965971025273547206",
|
||||
"patroni": {
|
||||
"version": "2.0.2",
|
||||
"scope": "patroni-demo"
|
||||
}
|
||||
}
|
19
test/json/node_is_replica_ok.json
Normal file
19
test/json/node_is_replica_ok.json
Normal file
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"state": "running",
|
||||
"postmaster_start_time": "2021-08-11 07:57:51.693 UTC",
|
||||
"role": "replica",
|
||||
"server_version": 110012,
|
||||
"cluster_unlocked": false,
|
||||
"xlog": {
|
||||
"received_location": 1174407088,
|
||||
"replayed_location": 1174407088,
|
||||
"replayed_timestamp": null,
|
||||
"paused": false
|
||||
},
|
||||
"timeline": 58,
|
||||
"database_system_identifier": "6965971025273547206",
|
||||
"patroni": {
|
||||
"version": "2.0.2",
|
||||
"scope": "patroni-demo"
|
||||
}
|
||||
}
|
26
test/json/node_patroni_version.json
Normal file
26
test/json/node_patroni_version.json
Normal file
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"state": "running",
|
||||
"postmaster_start_time": "2021-08-11 07:02:20.732 UTC",
|
||||
"role": "master",
|
||||
"server_version": 110012,
|
||||
"cluster_unlocked": false,
|
||||
"xlog": {
|
||||
"location": 1174407088
|
||||
},
|
||||
"timeline": 58,
|
||||
"replication": [
|
||||
{
|
||||
"usename": "replicator",
|
||||
"application_name": "srv1",
|
||||
"client_addr": "10.20.199.3",
|
||||
"state": "streaming",
|
||||
"sync_state": "async",
|
||||
"sync_priority": 0
|
||||
}
|
||||
],
|
||||
"database_system_identifier": "6965971025273547206",
|
||||
"patroni": {
|
||||
"version": "2.0.2",
|
||||
"scope": "patroni-demo"
|
||||
}
|
||||
}
|
26
test/json/node_tl_has_changed.json
Normal file
26
test/json/node_tl_has_changed.json
Normal file
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"state": "running",
|
||||
"postmaster_start_time": "2021-08-11 07:02:20.732 UTC",
|
||||
"role": "master",
|
||||
"server_version": 110012,
|
||||
"cluster_unlocked": false,
|
||||
"xlog": {
|
||||
"location": 1174407088
|
||||
},
|
||||
"timeline": 58,
|
||||
"replication": [
|
||||
{
|
||||
"usename": "replicator",
|
||||
"application_name": "srv1",
|
||||
"client_addr": "10.20.199.3",
|
||||
"state": "streaming",
|
||||
"sync_state": "async",
|
||||
"sync_priority": 0
|
||||
}
|
||||
],
|
||||
"database_system_identifier": "6965971025273547206",
|
||||
"patroni": {
|
||||
"version": "2.0.2",
|
||||
"scope": "patroni-demo"
|
||||
}
|
||||
}
|
103
test/test_cluster_config_has_changed.py
Normal file
103
test/test_cluster_config_has_changed.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
from tools import my_mock, here
|
||||
|
||||
|
||||
def test_cluster_config_has_changed_params(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_config_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_config_has_changed",
|
||||
"--hash",
|
||||
"640df9f0211c791723f18fc3ed9dbb95",
|
||||
"--state-file",
|
||||
str(here / "fake_file_name.state_file"),
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 3
|
||||
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_config_has_changed"]
|
||||
)
|
||||
assert result.exit_code == 3
|
||||
|
||||
|
||||
def test_cluster_config_has_changed_ok_with_hash(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_config_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_config_has_changed",
|
||||
"--hash",
|
||||
"640df9f0211c791723f18fc3ed9dbb95",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_cluster_config_has_changed_ok_with_state_file(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
with open(here / "cluster_config_has_changed.state_file", "w") as f:
|
||||
f.write('{"hash": "640df9f0211c791723f18fc3ed9dbb95"}')
|
||||
|
||||
my_mock(mocker, "cluster_config_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_config_has_changed",
|
||||
"--state-file",
|
||||
str(here / "cluster_config_has_changed.state_file"),
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_cluster_config_has_changed_ko_with_hash(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_config_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_config_has_changed",
|
||||
"--hash",
|
||||
"640df9f0211c791723f18fc3edffffff",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 2
|
||||
|
||||
|
||||
def test_cluster_config_has_changed_ko_with_state_file(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
with open(here / "cluster_config_has_changed.state_file", "w") as f:
|
||||
f.write('{"hash": "640df9f0211c791723f18fc3edffffff"}')
|
||||
|
||||
my_mock(mocker, "cluster_config_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_config_has_changed",
|
||||
"--state-file",
|
||||
str(here / "cluster_config_has_changed.state_file"),
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 2
|
29
test/test_cluster_has_leader.py
Normal file
29
test/test_cluster_has_leader.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock
|
||||
|
||||
|
||||
def test_cluster_has_leader_ok(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_has_leader_ok", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_has_leader"]
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
# FIXME Not captured ???
|
||||
# assert "CLUSTERHASLEADER OK - has_leader is 1 | has_leader=1;;@0" in result.output
|
||||
|
||||
|
||||
def test_cluster_has_leader_ko(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_has_leader_ko", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_has_leader"]
|
||||
)
|
||||
assert result.exit_code == 2
|
||||
# assert "CLUSTERHASLEADER CRITICAL - has_leader is 0 (outside range @0:0) | has_leader=0;;@0" in result.output
|
36
test/test_cluster_has_replica.py
Normal file
36
test/test_cluster_has_replica.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock
|
||||
|
||||
|
||||
# TODO Lag threshold tests
|
||||
def test_cluster_has_relica_ok(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_has_replica_ok", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_has_replica"]
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_cluster_has_replica_ok_with_count_thresholds(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_has_replica_ko", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_has_replica",
|
||||
"--warninng",
|
||||
"@2",
|
||||
"--critical",
|
||||
"@0:1",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 2
|
115
test/test_cluster_node_count.py
Normal file
115
test/test_cluster_node_count.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock
|
||||
|
||||
|
||||
def test_cluster_node_count_ok(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_node_count_ok", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_node_count"]
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_cluster_node_count_ok_with_thresholds(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_node_count_ok", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_node_count",
|
||||
"--warning",
|
||||
"@0:1",
|
||||
"--critical",
|
||||
"@2",
|
||||
"--running-warning",
|
||||
"@2",
|
||||
"--running-critical",
|
||||
"@0:1",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_cluster_node_count_running_warning(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_node_count_running_warning", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_node_count",
|
||||
"--running-warning",
|
||||
"@2",
|
||||
"--running-critical",
|
||||
"@0:1",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 1
|
||||
|
||||
|
||||
def test_cluster_node_count_running_critical(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_node_count_running_critical", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_node_count",
|
||||
"--running-warning",
|
||||
"@2",
|
||||
"--running-critical",
|
||||
"@0:1",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 2
|
||||
|
||||
|
||||
def test_cluster_node_count_warning(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_node_count_warning", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_node_count",
|
||||
"--warning",
|
||||
"@2",
|
||||
"--critical",
|
||||
"@0:1",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 1
|
||||
|
||||
|
||||
def test_cluster_node_count_critical(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_node_count_critical", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"cluster_node_count",
|
||||
"--warning",
|
||||
"@2",
|
||||
"--critical",
|
||||
"@0:1",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 2
|
22
test/test_node_is_alive.py
Normal file
22
test/test_node_is_alive.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock
|
||||
|
||||
|
||||
def test_node_is_alive_ok(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_is_alive", 200)
|
||||
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_alive"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_node_is_alive_ko(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_is_alive", 404)
|
||||
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_alive"])
|
||||
assert result.exit_code == 2
|
26
test/test_node_is_pending_restart.py
Normal file
26
test/test_node_is_pending_restart.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock
|
||||
|
||||
|
||||
def test_node_is_pending_restart_ok(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_is_pending_restart_ok", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "node_is_pending_restart"]
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_node_is_pending_restart_ko(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_is_pending_restart_ko", 404)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "node_is_pending_restart"]
|
||||
)
|
||||
assert result.exit_code == 2
|
22
test/test_node_is_primary.py
Normal file
22
test/test_node_is_primary.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock
|
||||
|
||||
|
||||
def test_node_is_primary_ok(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_is_primary_ok", 200)
|
||||
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_primary"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_node_is_primary_ko(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_is_primary_ko", 404)
|
||||
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_primary"])
|
||||
assert result.exit_code == 2
|
33
test/test_node_is_replica.py
Normal file
33
test/test_node_is_replica.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock
|
||||
|
||||
|
||||
def test_node_is_replica_ok(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_is_replica_ok", 200)
|
||||
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_replica"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_node_is_replica_ko(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_is_replica_ko", 404)
|
||||
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_replica"])
|
||||
assert result.exit_code == 2
|
||||
|
||||
|
||||
def test_node_is_replica_ko_lag(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
# We don't do the check ourselves, patroni does it and changes the return code
|
||||
my_mock(mocker, "node_is_replica_ok", 404)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "node_is_replica", "--lag", "100"]
|
||||
)
|
||||
assert result.exit_code == 2
|
40
test/test_node_patroni_version.py
Normal file
40
test/test_node_patroni_version.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock
|
||||
|
||||
|
||||
def test_node_patroni_version_ok(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_patroni_version", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"node_patroni_version",
|
||||
"--patroni-version",
|
||||
"2.0.2",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_node_patroni_version_ko(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_patroni_version", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"node_patroni_version",
|
||||
"--patroni-version",
|
||||
"1.0.0",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 2
|
104
test/test_node_tl_has_changed.py
Normal file
104
test/test_node_tl_has_changed.py
Normal file
|
@ -0,0 +1,104 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock, here
|
||||
|
||||
|
||||
def test_node_tl_has_changed_params(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_tl_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"node_tl_has_changed",
|
||||
"--timeline",
|
||||
"58",
|
||||
"--state-file",
|
||||
str(here / "fake_file_name.state_file"),
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 3
|
||||
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "node_tl_has_changed"]
|
||||
)
|
||||
assert result.exit_code == 3
|
||||
|
||||
|
||||
def test_node_tl_has_changed_ok_with_timeline(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_tl_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"node_tl_has_changed",
|
||||
"--timeline",
|
||||
"58",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_node_tl_has_changed_ok_with_state_file(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
with open(here / "node_tl_has_changed.state_file", "w") as f:
|
||||
f.write('{"timeline": 58}')
|
||||
|
||||
my_mock(mocker, "node_tl_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"node_tl_has_changed",
|
||||
"--state-file",
|
||||
str(here / "node_tl_has_changed.state_file"),
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_node_tl_has_changed_ko_with_timeline(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "node_tl_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"node_tl_has_changed",
|
||||
"--timeline",
|
||||
"700",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 2
|
||||
|
||||
|
||||
def test_node_tl_has_changed_ko_with_state_file(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
with open(here / "node_tl_has_changed.state_file", "w") as f:
|
||||
f.write('{"timeline": 700}')
|
||||
|
||||
my_mock(mocker, "node_tl_has_changed", 200)
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"-e",
|
||||
"https://10.20.199.3:8008",
|
||||
"node_tl_has_changed",
|
||||
"--state-file",
|
||||
str(here / "node_tl_has_changed.state_file"),
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 2
|
26
test/tools.py
Normal file
26
test/tools.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
import attr
|
||||
import pathlib
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.types import PatroniResource
|
||||
|
||||
here = pathlib.Path(__file__).parent
|
||||
|
||||
|
||||
def getjson(name: str) -> bytes:
|
||||
path = here / "json" / f"{name}.json"
|
||||
with path.open() as f:
|
||||
return f.read().encode("utf-8")
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True, slots=True)
|
||||
class MockApiReturnCode:
|
||||
data: bytes
|
||||
status: int
|
||||
|
||||
|
||||
def my_mock(mocker: MockerFixture, json_file: str, status: int) -> None:
|
||||
def mock_rest_api(self: PatroniResource, service: str) -> MockApiReturnCode:
|
||||
return MockApiReturnCode(getjson(json_file), status)
|
||||
|
||||
mocker.patch("check_patroni.types.PatroniResource.rest_api", mock_rest_api)
|
Loading…
Reference in a new issue