New service cluster_is_in_maintenance
This commit is contained in:
parent
4169766a2f
commit
dd8130a459
|
@ -12,6 +12,7 @@ from .cluster import (
|
|||
ClusterHasLeaderSummary,
|
||||
ClusterHasReplica,
|
||||
ClusterNodeCount,
|
||||
ClusterIsInMaintenance,
|
||||
)
|
||||
from .node import (
|
||||
NodeIsAlive,
|
||||
|
@ -183,6 +184,7 @@ def cluster_node_count(
|
|||
Perfdata:
|
||||
* `members`: the member count.
|
||||
* all the roles of the nodes in the cluster with their number.
|
||||
* all the statuses of the nodes in the cluster with their number.
|
||||
"""
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
|
@ -336,6 +338,31 @@ def cluster_config_has_changed(
|
|||
)
|
||||
|
||||
|
||||
@main.command(name="cluster_is_in_maintenance")
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def cluster_is_in_maintenance(ctx: click.Context) -> None:
|
||||
"""Check if the cluster is in maintenance mode ie paused.
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: If the cluster is in maintenance mode.
|
||||
* `CRITICAL`: otherwise.
|
||||
|
||||
\b
|
||||
Perfdata :
|
||||
* `is_in_maintenance` is 1 the cluster is in maintenance mode, 0 otherwise
|
||||
"""
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
ClusterIsInMaintenance(ctx.obj),
|
||||
nagiosplugin.ScalarContext("is_in_maintenance", None, "0:0"),
|
||||
)
|
||||
check.main(
|
||||
verbose=ctx.parent.params["verbose"], timeout=ctx.parent.params["timeout"]
|
||||
)
|
||||
|
||||
|
||||
@main.command(name="node_is_primary")
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
|
@ -393,7 +420,7 @@ def node_is_pending_restart(ctx: click.Context) -> None:
|
|||
"""Check if the node is in pending restart state.
|
||||
|
||||
This situation can arise if the configuration has been modified but
|
||||
requiers arestart of PostgreSQL.
|
||||
requiers a restart of PostgreSQL to take effect.
|
||||
|
||||
\b
|
||||
Check:
|
||||
|
@ -427,7 +454,7 @@ def node_is_pending_restart(ctx: click.Context) -> None:
|
|||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def node_tl_has_changed(ctx: click.Context, timeline: str, state_file: str) -> None:
|
||||
"""Check if the timeline hash changed.
|
||||
"""Check if the timeline has changed.
|
||||
|
||||
Note: either a timeline or a state file must be provided for this service to work.
|
||||
|
||||
|
|
|
@ -21,9 +21,9 @@ class ClusterNodeCount(PatroniResource):
|
|||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
role_counters = Counter()
|
||||
role_counters: Counter[str] = Counter()
|
||||
roles = []
|
||||
status_counters = Counter()
|
||||
status_counters: Counter[str] = Counter()
|
||||
statuses = []
|
||||
|
||||
for member in item_dict["members"]:
|
||||
|
@ -159,3 +159,15 @@ class ClusterConfigHasChangedSummary(nagiosplugin.Summary):
|
|||
self: "ClusterConfigHasChangedSummary", results: nagiosplugin.Result
|
||||
) -> str:
|
||||
return "The hash of patroni's dynamic configuration has changed."
|
||||
|
||||
|
||||
class ClusterIsInMaintenance(PatroniResource):
|
||||
def probe(self: "ClusterIsInMaintenance") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("cluster")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
|
||||
# The actual check
|
||||
return [nagiosplugin.Metric("is_in_maintenance", 1 if "pause" in item_dict and item_dict["pause"] else 0)]
|
||||
|
|
34
test/json/cluster_is_in_maintenance_ko.json
Normal file
34
test/json/cluster_is_in_maintenance_ko.json
Normal file
|
@ -0,0 +1,34 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
],
|
||||
"pause": true
|
||||
}
|
34
test/json/cluster_is_in_maintenance_ko_pause_false.json
Normal file
34
test/json/cluster_is_in_maintenance_ko_pause_false.json
Normal file
|
@ -0,0 +1,34 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
],
|
||||
"pause": false
|
||||
}
|
33
test/json/cluster_is_in_maintenance_ok.json
Normal file
33
test/json/cluster_is_in_maintenance_ok.json
Normal file
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
]
|
||||
}
|
34
test/json/cluster_is_in_maintenance_ok_pause_false.json
Normal file
34
test/json/cluster_is_in_maintenance_ok_pause_false.json
Normal file
|
@ -0,0 +1,34 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "replica",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
],
|
||||
"pause": false
|
||||
}
|
35
test/test_cluster_is_in_maintenance.py
Normal file
35
test/test_cluster_is_in_maintenance.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from tools import my_mock
|
||||
|
||||
|
||||
def test_cluster_is_in_maintenance_ok(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_is_in_maintenance_ok", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_is_in_maintenance"]
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_cluster_is_in_maintenance_ko(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_is_in_maintenance_ko", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_is_in_maintenance"]
|
||||
)
|
||||
assert result.exit_code == 2
|
||||
|
||||
def test_cluster_is_in_maintenance_ok_pause_false(mocker: MockerFixture) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_is_in_maintenance_ok_pause_false", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_is_in_maintenance"]
|
||||
)
|
||||
assert result.exit_code == 0
|
Loading…
Reference in a new issue