Add new service cluster_has_scheduled_action
This commit is contained in:
parent
7f6a03a3cc
commit
99bf1c5bb5
|
@ -5,6 +5,7 @@
|
|||
### Added
|
||||
|
||||
* Add `sync_standby` as a valid replica type for `cluster_has_replica`. (contributed by @mattpoel)
|
||||
* Add a new service `cluster_has_scheduled_action` to warn of any scheduled switchover or restart.
|
||||
|
||||
### Fixed
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ from .cluster import (
|
|||
ClusterHasLeader,
|
||||
ClusterHasLeaderSummary,
|
||||
ClusterHasReplica,
|
||||
ClusterHasScheduledAction,
|
||||
ClusterIsInMaintenance,
|
||||
ClusterNodeCount,
|
||||
)
|
||||
|
@ -436,6 +437,33 @@ def cluster_is_in_maintenance(ctx: click.Context) -> None:
|
|||
check.main(verbose=ctx.obj.verbose, timeout=ctx.obj.timeout)
|
||||
|
||||
|
||||
@main.command(name="cluster_has_scheduled_action")
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
def cluster_has_scheduled_action(ctx: click.Context) -> None:
|
||||
"""Check if the cluster has a scheduled action (switchover or restart)
|
||||
|
||||
\b
|
||||
Check:
|
||||
* `OK`: If the cluster has no scheduled action
|
||||
* `CRITICAL`: otherwise.
|
||||
|
||||
\b
|
||||
Perfdata:
|
||||
* `scheduled_actions` is 1 if the cluster has scheduled actions.
|
||||
* `scheduled_switchover` is 1 if the cluster has a scheduled switchover.
|
||||
* `scheduled_restart` counts the number of scheduled restart in the cluster.
|
||||
"""
|
||||
check = nagiosplugin.Check()
|
||||
check.add(
|
||||
ClusterHasScheduledAction(ctx.obj.connection_info),
|
||||
nagiosplugin.ScalarContext("has_scheduled_actions", None, "0:0"),
|
||||
nagiosplugin.ScalarContext("scheduled_switchover"),
|
||||
nagiosplugin.ScalarContext("scheduled_restart"),
|
||||
)
|
||||
check.main(verbose=ctx.obj.verbose, timeout=ctx.obj.timeout)
|
||||
|
||||
|
||||
@main.command(name="node_is_primary")
|
||||
@click.pass_context
|
||||
@nagiosplugin.guarded
|
||||
|
|
|
@ -191,3 +191,27 @@ class ClusterIsInMaintenance(PatroniResource):
|
|||
1 if "pause" in item_dict and item_dict["pause"] else 0,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class ClusterHasScheduledAction(PatroniResource):
|
||||
def probe(self: "ClusterIsInMaintenance") -> Iterable[nagiosplugin.Metric]:
|
||||
item_dict = self.rest_api("cluster")
|
||||
|
||||
scheduled_switchover = 0
|
||||
scheduled_restart = 0
|
||||
if "scheduled_switchover" in item_dict:
|
||||
scheduled_switchover = 1
|
||||
|
||||
for member in item_dict["members"]:
|
||||
if "scheduled_restart" in member:
|
||||
scheduled_restart += 1
|
||||
|
||||
# The actual check
|
||||
yield nagiosplugin.Metric(
|
||||
"has_scheduled_actions",
|
||||
1 if (scheduled_switchover + scheduled_restart) > 0 else 0,
|
||||
)
|
||||
|
||||
# The performance data : scheduled_switchover, scheduled action count
|
||||
yield nagiosplugin.Metric("scheduled_switchover", scheduled_switchover)
|
||||
yield nagiosplugin.Metric("scheduled_restart", scheduled_restart)
|
||||
|
|
27
tests/json/cluster_has_scheduled_action_ko_restart.json
Normal file
27
tests/json/cluster_has_scheduled_action_ko_restart.json
Normal file
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "p1",
|
||||
"role": "sync_standby",
|
||||
"state": "streaming",
|
||||
"api_url": "http://10.20.30.51:8008/patroni",
|
||||
"host": "10.20.30.51",
|
||||
"port": 5432,
|
||||
"timeline": 3,
|
||||
"scheduled_restart": {
|
||||
"schedule": "2023-10-08T11:30:00+00:00",
|
||||
"postmaster_start_time": "2023-08-21 08:08:33.415237+00:00"
|
||||
},
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "p2",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "http://10.20.30.52:8008/patroni",
|
||||
"host": "10.20.30.52",
|
||||
"port": 5432,
|
||||
"timeline": 3
|
||||
}
|
||||
]
|
||||
}
|
28
tests/json/cluster_has_scheduled_action_ko_switchover.json
Normal file
28
tests/json/cluster_has_scheduled_action_ko_switchover.json
Normal file
|
@ -0,0 +1,28 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "p1",
|
||||
"role": "sync_standby",
|
||||
"state": "streaming",
|
||||
"api_url": "http://10.20.30.51:8008/patroni",
|
||||
"host": "10.20.30.51",
|
||||
"port": 5432,
|
||||
"timeline": 3,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "p2",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "http://10.20.30.52:8008/patroni",
|
||||
"host": "10.20.30.52",
|
||||
"port": 5432,
|
||||
"timeline": 3
|
||||
}
|
||||
],
|
||||
"scheduled_switchover": {
|
||||
"at": "2023-10-08T11:30:00+00:00",
|
||||
"from": "p1",
|
||||
"to": "p2"
|
||||
}
|
||||
}
|
33
tests/json/cluster_has_scheduled_action_ok.json
Normal file
33
tests/json/cluster_has_scheduled_action_ok.json
Normal file
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"members": [
|
||||
{
|
||||
"name": "srv1",
|
||||
"role": "leader",
|
||||
"state": "running",
|
||||
"api_url": "https://10.20.199.3:8008/patroni",
|
||||
"host": "10.20.199.3",
|
||||
"port": 5432,
|
||||
"timeline": 51
|
||||
},
|
||||
{
|
||||
"name": "srv2",
|
||||
"role": "replica",
|
||||
"state": "streaming",
|
||||
"api_url": "https://10.20.199.4:8008/patroni",
|
||||
"host": "10.20.199.4",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
},
|
||||
{
|
||||
"name": "srv3",
|
||||
"role": "sync_standby",
|
||||
"state": "streaming",
|
||||
"api_url": "https://10.20.199.5:8008/patroni",
|
||||
"host": "10.20.199.5",
|
||||
"port": 5432,
|
||||
"timeline": 51,
|
||||
"lag": 0
|
||||
}
|
||||
]
|
||||
}
|
54
tests/test_cluster_has_scheduled_action.py
Normal file
54
tests/test_cluster_has_scheduled_action.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
from click.testing import CliRunner
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from check_patroni.cli import main
|
||||
|
||||
from .tools import my_mock
|
||||
|
||||
|
||||
def test_cluster_has_scheduled_action_ok(
|
||||
mocker: MockerFixture, use_old_replica_state: bool
|
||||
) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_has_scheduled_action_ok", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_has_scheduled_action"]
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert (
|
||||
result.stdout
|
||||
== "CLUSTERHASSCHEDULEDACTION OK - has_scheduled_actions is 0 | has_scheduled_actions=0;;0 scheduled_restart=0 scheduled_switchover=0\n"
|
||||
)
|
||||
|
||||
|
||||
def test_cluster_has_scheduled_action_ko_switchover(
|
||||
mocker: MockerFixture, use_old_replica_state: bool
|
||||
) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_has_scheduled_action_ko_switchover", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_has_scheduled_action"]
|
||||
)
|
||||
assert result.exit_code == 2
|
||||
assert (
|
||||
result.stdout
|
||||
== "CLUSTERHASSCHEDULEDACTION CRITICAL - has_scheduled_actions is 1 (outside range 0:0) | has_scheduled_actions=1;;0 scheduled_restart=0 scheduled_switchover=1\n"
|
||||
)
|
||||
|
||||
|
||||
def test_cluster_has_scheduled_action_ko_restart(
|
||||
mocker: MockerFixture, use_old_replica_state: bool
|
||||
) -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
my_mock(mocker, "cluster_has_scheduled_action_ko_restart", 200)
|
||||
result = runner.invoke(
|
||||
main, ["-e", "https://10.20.199.3:8008", "cluster_has_scheduled_action"]
|
||||
)
|
||||
assert result.exit_code == 2
|
||||
assert (
|
||||
result.stdout
|
||||
== "CLUSTERHASSCHEDULEDACTION CRITICAL - has_scheduled_actions is 1 (outside range 0:0) | has_scheduled_actions=1;;0 scheduled_restart=1 scheduled_switchover=0\n"
|
||||
)
|
|
@ -12,6 +12,7 @@ check_patroni -e "$1" cluster_config_has_changed --state-file cluster.sate_file
|
|||
check_patroni -e "$1" cluster_has_leader
|
||||
check_patroni -e "$1" cluster_has_replica
|
||||
check_patroni -e "$1" cluster_is_in_maintenance
|
||||
check_patroni -e "$1" cluster_has_scheduled_action
|
||||
check_patroni -e "$1" cluster_node_count
|
||||
echo "-- Node checks"
|
||||
check_patroni -e "$1" node_is_alive
|
||||
|
|
Loading…
Reference in a new issue