Add tests for the output of the script and support pre/post 3.0.4

* Change all replica status from `running` to `streaming`
* Add an option to pytest to change the state back to `running`
* Also tests the output of the script
* Add a quick test script for live clusters
This commit is contained in:
benoit 2023-08-22 11:08:38 +02:00 committed by Benoit
parent 4a6ae03458
commit d99faeba15
34 changed files with 505 additions and 153 deletions

View file

@ -3,6 +3,8 @@ doctests = True
ignore =
# line too long
E501,
# line break before binary operator (added by black)
W503,
exclude =
.git,
.mypy_cache,

1
.gitignore vendored
View file

@ -3,6 +3,7 @@ check_patroni.egg-info
tests/*.state_file
tests/config.ini
vagrant/.vagrant
vagrant/*.state_file
.*.swp
.venv/
.tox/

View file

@ -101,6 +101,7 @@ For example, the following command will raise:
```
check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1:
```
## SSL
Several options are available:
@ -111,6 +112,43 @@ Several options are available:
* `--cert_file`: your certificate or the concatenation of your certificate and private key
* `--key_file`: your private key (optional)
## Tests
The tests are located in `./tests`. For ease of coding, they are mocked. The
json files are in `./tests/json`. There is an evident drawback, if the json is
wrong or modifications have been made in patroni but not reported here: the
tests still work fine.
To run the tests:
1) download check_patroni, create a virtual env and install the script:
```bash
git clone https://github.com/dalibo/check_patroni.git
cd check_patroni
python -m venv .venv
. .venv/bin/activate
pip install -e check_patroni[test]
```
2) run the tests
```bash
pytest ./tests # nominal replica state is streaming (since v3.0.4)
pytest --use-old-replica-state ./tests # nominal replica state is running
```
Note: for any service that checks the state of a node in the `cluster` endpoint,
the json test file must be added in `./test/tools.py`.
A bash script is provided to perform all tests on a patroni endpoint
(`./vagrant/check_patroni.sh`), it takes one parameter: the endpoint
we give to the `-e/--endpoints` of check_patroni. Nothing fancy, it's
just a list of all service calls in a bash script.
```bash
./vagrant/check_patroni.sh http://10.20.30.51:8008
```
## Cluster services
### cluster_config_has_changed

View file

@ -95,7 +95,7 @@ class ClusterHasReplica(PatroniResource):
# patroni 3.0.4 changed the standby state from running to streaming
if (
member["state"] in ["running", "streaming"]
and member["lag"] != "unknown" # noqa: W503
and member["lag"] != "unknown"
):
replicas.append({"name": member["name"], "lag": member["lag"]})
if self.max_lag is None or self.max_lag >= int(member["lag"]):

View file

@ -85,6 +85,7 @@ For example, the following command will raise:
```
check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1:
```
## SSL
Several options are available:
@ -94,6 +95,43 @@ Several options are available:
* you have a client certificate for authenticating with Patroni's REST API:
* `--cert_file`: your certificate or the concatenation of your certificate and private key
* `--key_file`: your private key (optional)
## Tests
The tests are located in `./tests`. For ease of coding, they are mocked. The
json files are in `./tests/json`. There is an evident drawback, if the json is
wrong or modifications have been made in patroni but not reported here: the
tests still work fine.
To run the tests:
1) download check_patroni, create a virtual env and install the script:
```bash
git clone https://github.com/dalibo/check_patroni.git
cd check_patroni
python -m venv .venv
. .venv/bin/activate
pip install -e check_patroni[test]
```
2) run the tests
```bash
pytest ./tests # nominal replica state is streaming (since v3.0.4)
pytest --use-old-replica-state ./tests # nominal replica state is running
```
Note: for any service that checks the state of a node in the `cluster` endpoint,
the json test file must be added in `./test/tools.py`.
A bash script is provided to perform all tests on a patroni endpoint
(`./vagrant/check_patroni.sh`), it takes one parameter: the endpoint
we give to the `-e/--endpoints` of check_patroni. Nothing fancy, it's
just a list of all service calls in a bash script.
```bash
./vagrant/check_patroni.sh http://10.20.30.51:8008
```
_EOF_
readme
readme "## Cluster services"

12
tests/conftest.py Normal file
View file

@ -0,0 +1,12 @@
def pytest_addoption(parser):
"""
Add CLI options to `pytest` to pass those options to the test cases.
These options are used in `pytest_generate_tests`.
"""
parser.addoption("--use-old-replica-state", action="store_true", default=False)
def pytest_generate_tests(metafunc):
metafunc.parametrize(
"use_old_replica_state", [metafunc.config.getoption("use_old_replica_state")]
)

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "sync_standby",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -1,32 +0,0 @@
{
"members": [
{
"name": "srv1",
"role": "leader",
"state": "running",
"api_url": "https://10.20.199.3:8008/patroni",
"host": "10.20.199.3",
"port": 5432,
"timeline": 51
},
{
"name": "srv2",
"role": "replica",
"state": "start failed",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
"lag": "unknown"
},
{
"name": "srv3",
"role": "replica",
"state": "running",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,
"timeline": 51,
"lag": 0
}
]
}

View file

@ -12,7 +12,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,
@ -22,7 +22,7 @@
{
"name": "srv3",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.5:8008/patroni",
"host": "10.20.199.5",
"port": 5432,

View file

@ -12,7 +12,7 @@
{
"name": "srv2",
"role": "replica",
"state": "running",
"state": "streaming",
"api_url": "https://10.20.199.4:8008/patroni",
"host": "10.20.199.4",
"port": 5432,

View file

@ -6,7 +6,9 @@ from check_patroni.cli import main
from .tools import my_mock
def test_api_status_code_200(mocker: MockerFixture) -> None:
def test_api_status_code_200(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "node_is_pending_restart_ok", 200)
@ -16,7 +18,9 @@ def test_api_status_code_200(mocker: MockerFixture) -> None:
assert result.exit_code == 0
def test_api_status_code_404(mocker: MockerFixture) -> None:
def test_api_status_code_404(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "Fake test", 404)

View file

@ -7,31 +7,9 @@ from check_patroni.cli import main
from .tools import here, my_mock
def test_cluster_config_has_changed_params(mocker: MockerFixture) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_config_has_changed", 200)
result = runner.invoke(
main,
[
"-e",
"https://10.20.199.3:8008",
"cluster_config_has_changed",
"--hash",
"640df9f0211c791723f18fc3ed9dbb95",
"--state-file",
str(here / "fake_file_name.state_file"),
],
)
assert result.exit_code == 3
result = runner.invoke(
main, ["-e", "https://10.20.199.3:8008", "cluster_config_has_changed"]
)
assert result.exit_code == 3
def test_cluster_config_has_changed_ok_with_hash(mocker: MockerFixture) -> None:
def test_cluster_config_has_changed_ok_with_hash(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_config_has_changed", 200)
@ -45,11 +23,16 @@ def test_cluster_config_has_changed_ok_with_hash(mocker: MockerFixture) -> None:
"96b12d82571473d13e890b893734e731",
],
)
print(result.output)
assert result.exit_code == 0
assert (
result.stdout
== "CLUSTERCONFIGHASCHANGED OK - The hash of patroni's dynamic configuration has not changed (96b12d82571473d13e890b893734e731). | is_configuration_changed=0;;@1:1\n"
)
def test_cluster_config_has_changed_ok_with_state_file(mocker: MockerFixture) -> None:
def test_cluster_config_has_changed_ok_with_state_file(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
with open(here / "cluster_config_has_changed.state_file", "w") as f:
@ -67,9 +50,15 @@ def test_cluster_config_has_changed_ok_with_state_file(mocker: MockerFixture) ->
],
)
assert result.exit_code == 0
assert (
result.stdout
== "CLUSTERCONFIGHASCHANGED OK - The hash of patroni's dynamic configuration has not changed (96b12d82571473d13e890b893734e731). | is_configuration_changed=0;;@1:1\n"
)
def test_cluster_config_has_changed_ko_with_hash(mocker: MockerFixture) -> None:
def test_cluster_config_has_changed_ko_with_hash(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_config_has_changed", 200)
@ -84,10 +73,15 @@ def test_cluster_config_has_changed_ko_with_hash(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 2
assert (
result.stdout
== "CLUSTERCONFIGHASCHANGED CRITICAL - The hash of patroni's dynamic configuration has changed. The old hash was 96b12d82571473d13e890b8937ffffff. | is_configuration_changed=1;;@1:1\n"
)
def test_cluster_config_has_changed_ko_with_state_file_and_save(
mocker: MockerFixture,
use_old_replica_state: bool,
) -> None:
runner = CliRunner()
@ -107,6 +101,10 @@ def test_cluster_config_has_changed_ko_with_state_file_and_save(
],
)
assert result.exit_code == 2
assert (
result.stdout
== "CLUSTERCONFIGHASCHANGED CRITICAL - The hash of patroni's dynamic configuration has changed. The old hash was 96b12d82571473d13e890b8937ffffff. | is_configuration_changed=1;;@1:1\n"
)
cookie = nagiosplugin.Cookie(here / "cluster_config_has_changed.state_file")
cookie.open()
@ -128,6 +126,10 @@ def test_cluster_config_has_changed_ko_with_state_file_and_save(
],
)
assert result.exit_code == 2
assert (
result.stdout
== "CLUSTERCONFIGHASCHANGED CRITICAL - The hash of patroni's dynamic configuration has changed. The old hash was 96b12d82571473d13e890b8937ffffff. | is_configuration_changed=1;;@1:1\n"
)
cookie = nagiosplugin.Cookie(here / "cluster_config_has_changed.state_file")
cookie.open()
@ -135,3 +137,38 @@ def test_cluster_config_has_changed_ko_with_state_file_and_save(
cookie.close()
assert new_config_hash == "96b12d82571473d13e890b893734e731"
def test_cluster_config_has_changed_params(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
# This one is placed last because it seems like the exceptions are not flushed from stderr for the next tests.
runner = CliRunner()
my_mock(mocker, "cluster_config_has_changed", 200)
result = runner.invoke(
main,
[
"-e",
"https://10.20.199.3:8008",
"cluster_config_has_changed",
"--hash",
"640df9f0211c791723f18fc3ed9dbb95",
"--state-file",
str(here / "fake_file_name.state_file"),
],
)
assert result.exit_code == 3
assert (
result.stdout
== "CLUSTERCONFIGHASCHANGED UNKNOWN: click.exceptions.UsageError: Either --hash or --state-file should be provided for this service\n"
)
result = runner.invoke(
main, ["-e", "https://10.20.199.3:8008", "cluster_config_has_changed"]
)
assert result.exit_code == 3
assert (
result.stdout
== "CLUSTERCONFIGHASCHANGED UNKNOWN: click.exceptions.UsageError: Either --hash or --state-file should be provided for this service\n"
)

View file

@ -6,7 +6,9 @@ from check_patroni.cli import main
from .tools import my_mock
def test_cluster_has_leader_ok(mocker: MockerFixture) -> None:
def test_cluster_has_leader_ok(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_has_leader_ok", 200)
@ -14,11 +16,15 @@ def test_cluster_has_leader_ok(mocker: MockerFixture) -> None:
main, ["-e", "https://10.20.199.3:8008", "cluster_has_leader"]
)
assert result.exit_code == 0
# FIXME the data seems to not be written to stdout yet ...
# assert "CLUSTERHASLEADER OK - has_leader is 1 | has_leader=1;;@0" in result.output
assert (
result.stdout
== "CLUSTERHASLEADER OK - The cluster has a running leader. | has_leader=1;;@0\n"
)
def test_cluster_has_leader_ko(mocker: MockerFixture) -> None:
def test_cluster_has_leader_ko(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_has_leader_ko", 200)
@ -26,4 +32,7 @@ def test_cluster_has_leader_ko(mocker: MockerFixture) -> None:
main, ["-e", "https://10.20.199.3:8008", "cluster_has_leader"]
)
assert result.exit_code == 2
# assert "CLUSTERHASLEADER CRITICAL - has_leader is 0 (outside range @0:0) | has_leader=0;;@0" in result.output
assert (
result.stdout
== "CLUSTERHASLEADER CRITICAL - The cluster has no running leader. | has_leader=0;;@0\n"
)

View file

@ -7,20 +7,28 @@ from .tools import my_mock
# TODO Lag threshold tests
def test_cluster_has_relica_ok(mocker: MockerFixture) -> None:
def test_cluster_has_relica_ok(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_has_replica_ok", 200)
my_mock(mocker, "cluster_has_replica_ok", 200, use_old_replica_state)
result = runner.invoke(
main, ["-e", "https://10.20.199.3:8008", "cluster_has_replica"]
)
assert result.exit_code == 0
assert (
result.stdout
== "CLUSTERHASREPLICA OK - healthy_replica is 2 | healthy_replica=2 srv2_lag=0 srv3_lag=0 unhealthy_replica=0\n"
)
def test_cluster_has_replica_ok_with_count_thresholds(mocker: MockerFixture) -> None:
def test_cluster_has_replica_ok_with_count_thresholds(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_has_replica_ok", 200)
my_mock(mocker, "cluster_has_replica_ok", 200, use_old_replica_state)
result = runner.invoke(
main,
[
@ -34,14 +42,19 @@ def test_cluster_has_replica_ok_with_count_thresholds(mocker: MockerFixture) ->
],
)
assert result.exit_code == 0
assert (
result.stdout
== "CLUSTERHASREPLICA OK - healthy_replica is 2 | healthy_replica=2;@1;@0 srv2_lag=0 srv3_lag=0 unhealthy_replica=0\n"
)
def test_cluster_has_replica_ok_with_count_thresholds_lag(
mocker: MockerFixture,
use_old_replica_state: bool,
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_has_replica_ok_lag", 200)
my_mock(mocker, "cluster_has_replica_ok_lag", 200, use_old_replica_state)
result = runner.invoke(
main,
[
@ -57,12 +70,18 @@ def test_cluster_has_replica_ok_with_count_thresholds_lag(
],
)
assert result.exit_code == 0
assert (
result.stdout
== "CLUSTERHASREPLICA OK - healthy_replica is 2 | healthy_replica=2;@1;@0 srv2_lag=1024 srv3_lag=0 unhealthy_replica=0\n"
)
def test_cluster_has_replica_ko_with_count_thresholds(mocker: MockerFixture) -> None:
def test_cluster_has_replica_ko_with_count_thresholds(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_has_replica_ko", 200)
my_mock(mocker, "cluster_has_replica_ko", 200, use_old_replica_state)
result = runner.invoke(
main,
[
@ -76,14 +95,19 @@ def test_cluster_has_replica_ko_with_count_thresholds(mocker: MockerFixture) ->
],
)
assert result.exit_code == 1
assert (
result.stdout
== "CLUSTERHASREPLICA WARNING - healthy_replica is 1 (outside range @0:1) | healthy_replica=1;@1;@0 srv3_lag=0 unhealthy_replica=1\n"
)
def test_cluster_has_replica_ko_with_count_thresholds_and_lag(
mocker: MockerFixture,
use_old_replica_state: bool,
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_has_replica_ko_lag", 200)
my_mock(mocker, "cluster_has_replica_ko_lag", 200, use_old_replica_state)
result = runner.invoke(
main,
[
@ -99,3 +123,7 @@ def test_cluster_has_replica_ko_with_count_thresholds_and_lag(
],
)
assert result.exit_code == 2
assert (
result.stdout
== "CLUSTERHASREPLICA CRITICAL - healthy_replica is 0 (outside range @0:0) | healthy_replica=0;@1;@0 srv2_lag=10241024 srv3_lag=20000000 unhealthy_replica=2\n"
)

View file

@ -6,7 +6,9 @@ from check_patroni.cli import main
from .tools import my_mock
def test_cluster_is_in_maintenance_ok(mocker: MockerFixture) -> None:
def test_cluster_is_in_maintenance_ok(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_is_in_maintenance_ok", 200)
@ -14,9 +16,15 @@ def test_cluster_is_in_maintenance_ok(mocker: MockerFixture) -> None:
main, ["-e", "https://10.20.199.3:8008", "cluster_is_in_maintenance"]
)
assert result.exit_code == 0
assert (
result.stdout
== "CLUSTERISINMAINTENANCE OK - is_in_maintenance is 0 | is_in_maintenance=0;;0\n"
)
def test_cluster_is_in_maintenance_ko(mocker: MockerFixture) -> None:
def test_cluster_is_in_maintenance_ko(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_is_in_maintenance_ko", 200)
@ -24,9 +32,15 @@ def test_cluster_is_in_maintenance_ko(mocker: MockerFixture) -> None:
main, ["-e", "https://10.20.199.3:8008", "cluster_is_in_maintenance"]
)
assert result.exit_code == 2
assert (
result.stdout
== "CLUSTERISINMAINTENANCE CRITICAL - is_in_maintenance is 1 (outside range 0:0) | is_in_maintenance=1;;0\n"
)
def test_cluster_is_in_maintenance_ok_pause_false(mocker: MockerFixture) -> None:
def test_cluster_is_in_maintenance_ok_pause_false(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_is_in_maintenance_ok_pause_false", 200)
@ -34,3 +48,7 @@ def test_cluster_is_in_maintenance_ok_pause_false(mocker: MockerFixture) -> None
main, ["-e", "https://10.20.199.3:8008", "cluster_is_in_maintenance"]
)
assert result.exit_code == 0
assert (
result.stdout
== "CLUSTERISINMAINTENANCE OK - is_in_maintenance is 0 | is_in_maintenance=0;;0\n"
)

View file

@ -6,20 +6,34 @@ from check_patroni.cli import main
from .tools import my_mock
def test_cluster_node_count_ok(mocker: MockerFixture) -> None:
def test_cluster_node_count_ok(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_node_count_ok", 200)
my_mock(mocker, "cluster_node_count_ok", 200, use_old_replica_state)
result = runner.invoke(
main, ["-e", "https://10.20.199.3:8008", "cluster_node_count"]
)
assert result.exit_code == 0
if use_old_replica_state:
assert (
result.output
== "CLUSTERNODECOUNT OK - members is 3 | healthy_members=3 members=3 role_leader=1 role_replica=2 state_running=3\n"
)
else:
assert (
result.output
== "CLUSTERNODECOUNT OK - members is 3 | healthy_members=3 members=3 role_leader=1 role_replica=2 state_running=1 state_streaming=2\n"
)
def test_cluster_node_count_ok_with_thresholds(mocker: MockerFixture) -> None:
def test_cluster_node_count_ok_with_thresholds(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_node_count_ok", 200)
my_mock(mocker, "cluster_node_count_ok", 200, use_old_replica_state)
result = runner.invoke(
main,
[
@ -37,12 +51,24 @@ def test_cluster_node_count_ok_with_thresholds(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 0
if use_old_replica_state:
assert (
result.output
== "CLUSTERNODECOUNT OK - members is 3 | healthy_members=3;@2;@1 members=3;@1;@2 role_leader=1 role_replica=2 state_running=3\n"
)
else:
assert (
result.output
== "CLUSTERNODECOUNT OK - members is 3 | healthy_members=3;@2;@1 members=3;@1;@2 role_leader=1 role_replica=2 state_running=1 state_streaming=2\n"
)
def test_cluster_node_count_running_warning(mocker: MockerFixture) -> None:
def test_cluster_node_count_healthy_warning(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_node_count_running_warning", 200)
my_mock(mocker, "cluster_node_count_healthy_warning", 200, use_old_replica_state)
result = runner.invoke(
main,
[
@ -56,12 +82,24 @@ def test_cluster_node_count_running_warning(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 1
if use_old_replica_state:
assert (
result.output
== "CLUSTERNODECOUNT WARNING - healthy_members is 2 (outside range @0:2) | healthy_members=2;@2;@1 members=2 role_leader=1 role_replica=1 state_running=2\n"
)
else:
assert (
result.output
== "CLUSTERNODECOUNT WARNING - healthy_members is 2 (outside range @0:2) | healthy_members=2;@2;@1 members=2 role_leader=1 role_replica=1 state_running=1 state_streaming=1\n"
)
def test_cluster_node_count_running_critical(mocker: MockerFixture) -> None:
def test_cluster_node_count_healthy_critical(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_node_count_running_critical", 200)
my_mock(mocker, "cluster_node_count_healthy_critical", 200, use_old_replica_state)
result = runner.invoke(
main,
[
@ -75,12 +113,18 @@ def test_cluster_node_count_running_critical(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 2
assert (
result.output
== "CLUSTERNODECOUNT CRITICAL - healthy_members is 1 (outside range @0:1) | healthy_members=1;@2;@1 members=3 role_leader=1 role_replica=2 state_running=1 state_start_failed=2\n"
)
def test_cluster_node_count_warning(mocker: MockerFixture) -> None:
def test_cluster_node_count_warning(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_node_count_warning", 200)
my_mock(mocker, "cluster_node_count_warning", 200, use_old_replica_state)
result = runner.invoke(
main,
[
@ -94,12 +138,24 @@ def test_cluster_node_count_warning(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 1
if use_old_replica_state:
assert (
result.stdout
== "CLUSTERNODECOUNT WARNING - members is 2 (outside range @0:2) | healthy_members=2 members=2;@2;@1 role_leader=1 role_replica=1 state_running=2\n"
)
else:
assert (
result.stdout
== "CLUSTERNODECOUNT WARNING - members is 2 (outside range @0:2) | healthy_members=2 members=2;@2;@1 role_leader=1 role_replica=1 state_running=1 state_streaming=1\n"
)
def test_cluster_node_count_critical(mocker: MockerFixture) -> None:
def test_cluster_node_count_critical(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "cluster_node_count_critical", 200)
my_mock(mocker, "cluster_node_count_critical", 200, use_old_replica_state)
result = runner.invoke(
main,
[
@ -113,3 +169,7 @@ def test_cluster_node_count_critical(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 2
assert (
result.stdout
== "CLUSTERNODECOUNT CRITICAL - members is 1 (outside range @0:1) | healthy_members=1 members=1;@2;@1 role_leader=1 state_running=1\n"
)

View file

@ -6,17 +6,25 @@ from check_patroni.cli import main
from .tools import my_mock
def test_node_is_alive_ok(mocker: MockerFixture) -> None:
def test_node_is_alive_ok(mocker: MockerFixture, use_old_replica_state: bool) -> None:
runner = CliRunner()
my_mock(mocker, None, 200)
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_alive"])
assert result.exit_code == 0
assert (
result.stdout
== "NODEISALIVE OK - This node is alive (patroni is running). | is_alive=1;;@0\n"
)
def test_node_is_alive_ko(mocker: MockerFixture) -> None:
def test_node_is_alive_ko(mocker: MockerFixture, use_old_replica_state: bool) -> None:
runner = CliRunner()
my_mock(mocker, None, 404)
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_alive"])
assert result.exit_code == 2
assert (
result.stdout
== "NODEISALIVE CRITICAL - This node is not alive (patroni is not running). | is_alive=0;;@0\n"
)

View file

@ -6,7 +6,9 @@ from check_patroni.cli import main
from .tools import my_mock
def test_node_is_pending_restart_ok(mocker: MockerFixture) -> None:
def test_node_is_pending_restart_ok(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "node_is_pending_restart_ok", 200)
@ -14,9 +16,15 @@ def test_node_is_pending_restart_ok(mocker: MockerFixture) -> None:
main, ["-e", "https://10.20.199.3:8008", "node_is_pending_restart"]
)
assert result.exit_code == 0
assert (
result.stdout
== "NODEISPENDINGRESTART OK - This node doesn't have the pending restart flag. | is_pending_restart=0;;0\n"
)
def test_node_is_pending_restart_ko(mocker: MockerFixture) -> None:
def test_node_is_pending_restart_ko(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "node_is_pending_restart_ko", 200)
@ -24,3 +32,7 @@ def test_node_is_pending_restart_ko(mocker: MockerFixture) -> None:
main, ["-e", "https://10.20.199.3:8008", "node_is_pending_restart"]
)
assert result.exit_code == 2
assert (
result.stdout
== "NODEISPENDINGRESTART CRITICAL - This node has the pending restart flag. | is_pending_restart=1;;0\n"
)

View file

@ -6,17 +6,25 @@ from check_patroni.cli import main
from .tools import my_mock
def test_node_is_primary_ok(mocker: MockerFixture) -> None:
def test_node_is_primary_ok(mocker: MockerFixture, use_old_replica_state: bool) -> None:
runner = CliRunner()
my_mock(mocker, "node_is_primary_ok", 200)
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_primary"])
assert result.exit_code == 0
assert (
result.stdout
== "NODEISPRIMARY OK - This node is the primary with the leader lock. | is_primary=1;;@0\n"
)
def test_node_is_primary_ko(mocker: MockerFixture) -> None:
def test_node_is_primary_ko(mocker: MockerFixture, use_old_replica_state: bool) -> None:
runner = CliRunner()
my_mock(mocker, "node_is_primary_ko", 404)
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_primary"])
assert result.exit_code == 2
assert (
result.stdout
== "NODEISPRIMARY CRITICAL - This node is not the primary with the leader lock. | is_primary=0;;@0\n"
)

View file

@ -6,23 +6,33 @@ from check_patroni.cli import main
from .tools import my_mock
def test_node_is_replica_ok(mocker: MockerFixture) -> None:
def test_node_is_replica_ok(mocker: MockerFixture, use_old_replica_state: bool) -> None:
runner = CliRunner()
my_mock(mocker, "node_is_replica_ok", 200)
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_replica"])
assert result.exit_code == 0
assert (
result.stdout
== "NODEISREPLICA OK - This node is a running replica with no noloadbalance tag. | is_replica=1;;@0\n"
)
def test_node_is_replica_ko(mocker: MockerFixture) -> None:
def test_node_is_replica_ko(mocker: MockerFixture, use_old_replica_state: bool) -> None:
runner = CliRunner()
my_mock(mocker, "node_is_replica_ko", 404)
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_replica"])
assert result.exit_code == 2
assert (
result.stdout
== "NODEISREPLICA CRITICAL - This node is not a running replica with no noloadbalance tag. | is_replica=0;;@0\n"
)
def test_node_is_replica_ko_lag(mocker: MockerFixture) -> None:
def test_node_is_replica_ko_lag(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
# We don't do the check ourselves, patroni does it and changes the return code
@ -31,3 +41,7 @@ def test_node_is_replica_ko_lag(mocker: MockerFixture) -> None:
main, ["-e", "https://10.20.199.3:8008", "node_is_replica", "--max-lag", "100"]
)
assert result.exit_code == 2
assert (
result.stdout
== "NODEISREPLICA CRITICAL - This node is not a running replica with no noloadbalance tag and a lag under 100. | is_replica=0;;@0\n"
)

View file

@ -6,7 +6,9 @@ from check_patroni.cli import main
from .tools import my_mock
def test_node_patroni_version_ok(mocker: MockerFixture) -> None:
def test_node_patroni_version_ok(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "node_patroni_version", 200)
@ -21,9 +23,15 @@ def test_node_patroni_version_ok(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 0
assert (
result.stdout
== "NODEPATRONIVERSION OK - Patroni's version is 2.0.2. | is_version_ok=1;;@0\n"
)
def test_node_patroni_version_ko(mocker: MockerFixture) -> None:
def test_node_patroni_version_ko(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "node_patroni_version", 200)
@ -38,3 +46,7 @@ def test_node_patroni_version_ko(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 2
assert (
result.stdout
== "NODEPATRONIVERSION CRITICAL - Patroni's version is not 1.0.0. | is_version_ok=0;;@0\n"
)

View file

@ -7,31 +7,9 @@ from check_patroni.cli import main
from .tools import here, my_mock
def test_node_tl_has_changed_params(mocker: MockerFixture) -> None:
runner = CliRunner()
my_mock(mocker, "node_tl_has_changed", 200)
result = runner.invoke(
main,
[
"-e",
"https://10.20.199.3:8008",
"node_tl_has_changed",
"--timeline",
"58",
"--state-file",
str(here / "fake_file_name.state_file"),
],
)
assert result.exit_code == 3
result = runner.invoke(
main, ["-e", "https://10.20.199.3:8008", "node_tl_has_changed"]
)
assert result.exit_code == 3
def test_node_tl_has_changed_ok_with_timeline(mocker: MockerFixture) -> None:
def test_node_tl_has_changed_ok_with_timeline(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "node_tl_has_changed", 200)
@ -46,9 +24,15 @@ def test_node_tl_has_changed_ok_with_timeline(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 0
assert (
result.stdout
== "NODETLHASCHANGED OK - The timeline is still 58. | is_timeline_changed=0;;@1:1 timeline=58\n"
)
def test_node_tl_has_changed_ok_with_state_file(mocker: MockerFixture) -> None:
def test_node_tl_has_changed_ok_with_state_file(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
with open(here / "node_tl_has_changed.state_file", "w") as f:
@ -66,9 +50,15 @@ def test_node_tl_has_changed_ok_with_state_file(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 0
assert (
result.stdout
== "NODETLHASCHANGED OK - The timeline is still 58. | is_timeline_changed=0;;@1:1 timeline=58\n"
)
def test_node_tl_has_changed_ko_with_timeline(mocker: MockerFixture) -> None:
def test_node_tl_has_changed_ko_with_timeline(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
my_mock(mocker, "node_tl_has_changed", 200)
@ -83,9 +73,15 @@ def test_node_tl_has_changed_ko_with_timeline(mocker: MockerFixture) -> None:
],
)
assert result.exit_code == 2
assert (
result.stdout
== "NODETLHASCHANGED CRITICAL - The expected timeline was 700 got 58. | is_timeline_changed=1;;@1:1 timeline=58\n"
)
def test_node_tl_has_changed_ko_with_state_file_and_save(mocker: MockerFixture) -> None:
def test_node_tl_has_changed_ko_with_state_file_and_save(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
runner = CliRunner()
with open(here / "node_tl_has_changed.state_file", "w") as f:
@ -104,6 +100,10 @@ def test_node_tl_has_changed_ko_with_state_file_and_save(mocker: MockerFixture)
],
)
assert result.exit_code == 2
assert (
result.stdout
== "NODETLHASCHANGED CRITICAL - The expected timeline was 700 got 58. | is_timeline_changed=1;;@1:1 timeline=58\n"
)
cookie = nagiosplugin.Cookie(here / "node_tl_has_changed.state_file")
cookie.open()
@ -125,6 +125,10 @@ def test_node_tl_has_changed_ko_with_state_file_and_save(mocker: MockerFixture)
],
)
assert result.exit_code == 2
assert (
result.stdout
== "NODETLHASCHANGED CRITICAL - The expected timeline was 700 got 58. | is_timeline_changed=1;;@1:1 timeline=58\n"
)
cookie = nagiosplugin.Cookie(here / "node_tl_has_changed.state_file")
cookie.open()
@ -132,3 +136,38 @@ def test_node_tl_has_changed_ko_with_state_file_and_save(mocker: MockerFixture)
cookie.close()
assert new_tl == 58
def test_node_tl_has_changed_params(
mocker: MockerFixture, use_old_replica_state: bool
) -> None:
# This one is placed last because it seems like the exceptions are not flushed from stderr for the next tests.
runner = CliRunner()
my_mock(mocker, "node_tl_has_changed", 200)
result = runner.invoke(
main,
[
"-e",
"https://10.20.199.3:8008",
"node_tl_has_changed",
"--timeline",
"58",
"--state-file",
str(here / "fake_file_name.state_file"),
],
)
assert result.exit_code == 3
assert (
result.stdout
== "NODETLHASCHANGED UNKNOWN: click.exceptions.UsageError: Either --timeline or --state-file should be provided for this service\n"
)
result = runner.invoke(
main, ["-e", "https://10.20.199.3:8008", "node_tl_has_changed"]
)
assert result.exit_code == 3
assert (
result.stdout
== "NODETLHASCHANGED UNKNOWN: click.exceptions.UsageError: Either --timeline or --state-file should be provided for this service\n"
)

View file

@ -18,11 +18,32 @@ def getjson(name: str) -> Any:
return json.load(f)
def my_mock(mocker: MockerFixture, json_file: str, status: int) -> None:
def my_mock(
mocker: MockerFixture,
json_file: str,
status: int,
use_old_replica_state: bool = False,
) -> None:
def mock_rest_api(self: PatroniResource, service: str) -> Any:
if status != 200:
raise APIError("Test en erreur pour status code 200")
return getjson(json_file) if json_file else None
if json_file:
if use_old_replica_state and (
json_file.startswith("cluster_has_replica")
or json_file.startswith("cluster_node_count")
):
return cluster_api_set_replica_running(getjson(json_file))
return getjson(json_file)
return None
mocker.resetall()
mocker.patch("check_patroni.types.PatroniResource.rest_api", mock_rest_api)
def cluster_api_set_replica_running(js: Any) -> Any:
# starting from 3.0.4 the state of replicas is streaming instead of running
for node in js["members"]:
if node["role"] in ["replica", "sync_standby"]:
if node["state"] == "streaming":
node["state"] = "running"
return js

23
vagrant/check_patroni.sh Executable file
View file

@ -0,0 +1,23 @@
#!/bin/bash
if [[ -z "$1" ]]; then
echo "usage: $0 PATRONI_END_POINT"
exit 1
fi
echo "-- Running patroni checks using endpoint $1"
echo "-- Cluster checks"
check_patroni -e "$1" cluster_config_has_changed --state-file cluster.sate_file --save &>/dev/null
check_patroni -e "$1" cluster_config_has_changed --state-file cluster.sate_file
check_patroni -e "$1" cluster_has_leader
check_patroni -e "$1" cluster_has_replica
check_patroni -e "$1" cluster_is_in_maintenance
check_patroni -e "$1" cluster_node_count
echo "-- Node checks"
check_patroni -e "$1" node_is_alive
check_patroni -e "$1" node_is_pending_restart
check_patroni -e "$1" node_is_primary
check_patroni -e "$1" node_is_replica
check_patroni -e "$1" node_patroni_version --patroni-version 3.1.0
check_patroni -e "$1" node_tl_has_changed --state-file cluster.sate_file --save &>/dev/null
check_patroni -e "$1" node_tl_has_changed --state-file cluster.sate_file