Add a --save option when state files are used

The checks `cluster_config_has_changed` and `node_tl_has_changed` use a
state file to store the previous value of the config hash and the
timeline.

Previously the check would fail if something changed, but the new value
would be saved directly. This behavious has changed. The new value
is saved only if `--save` is passed to the check.

The mimics the way [check_pgactivity] manages this kind of checks.

[check_pgactivity]: https://github.com/OPMDG/check_pgactivity
This commit is contained in:
benoit 2023-03-01 16:46:55 +01:00 committed by Benoit
parent e7e7ac2e3a
commit 908669f073
6 changed files with 86 additions and 14 deletions

View file

@ -61,7 +61,6 @@ issue](https://github.com/dalibo/check_patroni/issues/new). Dalibo has no
commitment on response time for public free support. Thanks for you
contribution !
## Config file
All global and service specific parameters can be specified via a config file has follows:
@ -119,6 +118,8 @@ Usage: check_patroni cluster_config_has_changed [OPTIONS]
Options:
--hash TEXT A hash to compare with.
-s, --state-file TEXT A state file to store the hash of the configuration.
--save Set the current configuration hash as the reference
for future calls.
--help Show this message and exit.
```
@ -320,7 +321,7 @@ Usage: check_patroni node_tl_has_changed [OPTIONS]
work.
Check:
* `OK`: The timeline is the same as last time (`--state_file`) or the inputed timeline (`--timeline`)
* `OK`: The timeline is the same as last time (`--state_file`) or the inputted timeline (`--timeline`)
* `CRITICAL`: The tl is not the same.
Perfdata:
@ -330,6 +331,8 @@ Usage: check_patroni node_tl_has_changed [OPTIONS]
Options:
--timeline TEXT A timeline number to compare with.
-s, --state-file TEXT A state file to store the last tl number into.
--save Set the current timeline number as the reference for
future calls.
--help Show this message and exit.
```

View file

@ -308,10 +308,17 @@ def cluster_has_replica(
type=str,
help="A state file to store the hash of the configuration.",
)
@click.option(
"--save",
"save_config",
is_flag=True,
default=False,
help="Set the current configuration hash as the reference for future calls.",
)
@click.pass_context
@nagiosplugin.guarded
def cluster_config_has_changed(
ctx: click.Context, config_hash: str, state_file: str
ctx: click.Context, config_hash: str, state_file: str, save_config: bool
) -> None:
"""Check if the hash of the configuration has changed.
@ -343,7 +350,9 @@ def cluster_config_has_changed(
check = nagiosplugin.Check()
check.add(
ClusterConfigHasChanged(ctx.obj.connection_info, old_config_hash, state_file),
ClusterConfigHasChanged(
ctx.obj.connection_info, old_config_hash, state_file, save_config
),
nagiosplugin.ScalarContext("is_configuration_changed", None, "@1:1"),
ClusterConfigHasChangedSummary(old_config_hash),
)
@ -455,9 +464,18 @@ def node_is_pending_restart(ctx: click.Context) -> None:
type=str,
help="A state file to store the last tl number into.",
)
@click.option(
"--save",
"save_tl",
is_flag=True,
default=False,
help="Set the current timeline number as the reference for future calls.",
)
@click.pass_context
@nagiosplugin.guarded
def node_tl_has_changed(ctx: click.Context, timeline: str, state_file: str) -> None:
def node_tl_has_changed(
ctx: click.Context, timeline: str, state_file: str, save_tl: bool
) -> None:
"""Check if the timeline has changed.
Note: either a timeline or a state file must be provided for this service to work.
@ -488,7 +506,7 @@ def node_tl_has_changed(ctx: click.Context, timeline: str, state_file: str) -> N
check = nagiosplugin.Check()
check.add(
NodeTLHasChanged(ctx.obj.connection_info, old_timeline, state_file),
NodeTLHasChanged(ctx.obj.connection_info, old_timeline, state_file, save_tl),
nagiosplugin.ScalarContext("is_timeline_changed", None, "@1:1"),
nagiosplugin.ScalarContext("timeline"),
NodeTLHasChangedSummary(old_timeline),

View file

@ -132,10 +132,12 @@ class ClusterConfigHasChanged(PatroniResource):
connection_info: ConnectionInfo,
config_hash: str, # Always contains the old hash
state_file: str, # Only used to update the hash in the state_file (when needed)
save: bool = False, # Save the configuration
):
super().__init__(connection_info)
self.state_file = state_file
self.config_hash = config_hash
self.save = save
def probe(self: "ClusterConfigHasChanged") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("config")
@ -144,9 +146,10 @@ class ClusterConfigHasChanged(PatroniResource):
new_hash = hashlib.md5(r.data).hexdigest()
_log.debug(f"save result: {self.save}")
old_hash = self.config_hash
if self.state_file is not None:
_log.debug(f"Saving new hash to state file / cookie {self.state_file}")
if self.state_file is not None and self.save:
_log.debug(f"saving new hash to state file / cookie {self.state_file}")
cookie = nagiosplugin.Cookie(self.state_file)
cookie.open()
cookie["hash"] = new_hash

View file

@ -95,10 +95,12 @@ class NodeTLHasChanged(PatroniResource):
connection_info: ConnectionInfo,
timeline: str, # Always contains the old timeline
state_file: str, # Only used to update the timeline in the state_file (when needed)
save: bool, # save timeline in state file
) -> None:
super().__init__(connection_info)
self.state_file = state_file
self.timeline = timeline
self.save = save
def probe(self: "NodeTLHasChanged") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("patroni")
@ -108,9 +110,10 @@ class NodeTLHasChanged(PatroniResource):
item_dict = json.loads(r.data)
new_tl = item_dict["timeline"]
_log.debug(f"save result: {self.save}")
old_tl = self.timeline
if self.state_file is not None:
_log.debug(f"Saving new timeline to state file / cookie {self.state_file}")
if self.state_file is not None and self.save:
_log.debug(f"saving new timeline to state file / cookie {self.state_file}")
cookie = nagiosplugin.Cookie(self.state_file)
cookie.open()
cookie["timeline"] = new_tl

View file

@ -45,6 +45,7 @@ def test_cluster_config_has_changed_ok_with_hash(mocker: MockerFixture) -> None:
"640df9f0211c791723f18fc3ed9dbb95",
],
)
print(result.output)
assert result.exit_code == 0
@ -85,13 +86,16 @@ def test_cluster_config_has_changed_ko_with_hash(mocker: MockerFixture) -> None:
assert result.exit_code == 2
def test_cluster_config_has_changed_ko_with_state_file(mocker: MockerFixture) -> None:
def test_cluster_config_has_changed_ko_with_state_file_and_save(
mocker: MockerFixture,
) -> None:
runner = CliRunner()
with open(here / "cluster_config_has_changed.state_file", "w") as f:
f.write('{"hash": "640df9f0211c791723f18fc3edffffff"}')
my_mock(mocker, "cluster_config_has_changed", 200)
# test without saving the new hash
result = runner.invoke(
main,
[
@ -104,7 +108,27 @@ def test_cluster_config_has_changed_ko_with_state_file(mocker: MockerFixture) ->
)
assert result.exit_code == 2
# the new hash was saved
cookie = nagiosplugin.Cookie(here / "cluster_config_has_changed.state_file")
cookie.open()
new_config_hash = cookie.get("hash")
cookie.close()
assert new_config_hash == "640df9f0211c791723f18fc3edffffff"
# test when we save the hash
result = runner.invoke(
main,
[
"-e",
"https://10.20.199.3:8008",
"cluster_config_has_changed",
"--state-file",
str(here / "cluster_config_has_changed.state_file"),
"--save",
],
)
assert result.exit_code == 2
cookie = nagiosplugin.Cookie(here / "cluster_config_has_changed.state_file")
cookie.open()
new_config_hash = cookie.get("hash")

View file

@ -86,13 +86,14 @@ def test_node_tl_has_changed_ko_with_timeline(mocker: MockerFixture) -> None:
assert result.exit_code == 2
def test_node_tl_has_changed_ko_with_state_file(mocker: MockerFixture) -> None:
def test_node_tl_has_changed_ko_with_state_file_and_save(mocker: MockerFixture) -> None:
runner = CliRunner()
with open(here / "node_tl_has_changed.state_file", "w") as f:
f.write('{"timeline": 700}')
my_mock(mocker, "node_tl_has_changed", 200)
# test without saving the new tl
result = runner.invoke(
main,
[
@ -105,7 +106,27 @@ def test_node_tl_has_changed_ko_with_state_file(mocker: MockerFixture) -> None:
)
assert result.exit_code == 2
# the new timeline was saved
cookie = nagiosplugin.Cookie(here / "node_tl_has_changed.state_file")
cookie.open()
new_tl = cookie.get("timeline")
cookie.close()
assert new_tl == 700
# test when we save the hash
result = runner.invoke(
main,
[
"-e",
"https://10.20.199.3:8008",
"node_tl_has_changed",
"--state-file",
str(here / "node_tl_has_changed.state_file"),
"--save",
],
)
assert result.exit_code == 2
cookie = nagiosplugin.Cookie(here / "node_tl_has_changed.state_file")
cookie.open()
new_tl = cookie.get("timeline")