From 908669f0730af83938459520c15d08f5b3f9fa5c Mon Sep 17 00:00:00 2001 From: benoit Date: Wed, 1 Mar 2023 16:46:55 +0100 Subject: [PATCH] Add a `--save` option when state files are used The checks `cluster_config_has_changed` and `node_tl_has_changed` use a state file to store the previous value of the config hash and the timeline. Previously the check would fail if something changed, but the new value would be saved directly. This behavious has changed. The new value is saved only if `--save` is passed to the check. The mimics the way [check_pgactivity] manages this kind of checks. [check_pgactivity]: https://github.com/OPMDG/check_pgactivity --- README.md | 7 ++++-- check_patroni/cli.py | 26 ++++++++++++++++++---- check_patroni/cluster.py | 7 ++++-- check_patroni/node.py | 7 ++++-- tests/test_cluster_config_has_changed.py | 28 ++++++++++++++++++++++-- tests/test_node_tl_has_changed.py | 25 +++++++++++++++++++-- 6 files changed, 86 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index f3e91af..4fdc47a 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,6 @@ issue](https://github.com/dalibo/check_patroni/issues/new). Dalibo has no commitment on response time for public free support. Thanks for you contribution ! - ## Config file All global and service specific parameters can be specified via a config file has follows: @@ -119,6 +118,8 @@ Usage: check_patroni cluster_config_has_changed [OPTIONS] Options: --hash TEXT A hash to compare with. -s, --state-file TEXT A state file to store the hash of the configuration. + --save Set the current configuration hash as the reference + for future calls. --help Show this message and exit. ``` @@ -320,7 +321,7 @@ Usage: check_patroni node_tl_has_changed [OPTIONS] work. Check: - * `OK`: The timeline is the same as last time (`--state_file`) or the inputed timeline (`--timeline`) + * `OK`: The timeline is the same as last time (`--state_file`) or the inputted timeline (`--timeline`) * `CRITICAL`: The tl is not the same. Perfdata: @@ -330,6 +331,8 @@ Usage: check_patroni node_tl_has_changed [OPTIONS] Options: --timeline TEXT A timeline number to compare with. -s, --state-file TEXT A state file to store the last tl number into. + --save Set the current timeline number as the reference for + future calls. --help Show this message and exit. ``` diff --git a/check_patroni/cli.py b/check_patroni/cli.py index c166153..55f403c 100644 --- a/check_patroni/cli.py +++ b/check_patroni/cli.py @@ -308,10 +308,17 @@ def cluster_has_replica( type=str, help="A state file to store the hash of the configuration.", ) +@click.option( + "--save", + "save_config", + is_flag=True, + default=False, + help="Set the current configuration hash as the reference for future calls.", +) @click.pass_context @nagiosplugin.guarded def cluster_config_has_changed( - ctx: click.Context, config_hash: str, state_file: str + ctx: click.Context, config_hash: str, state_file: str, save_config: bool ) -> None: """Check if the hash of the configuration has changed. @@ -343,7 +350,9 @@ def cluster_config_has_changed( check = nagiosplugin.Check() check.add( - ClusterConfigHasChanged(ctx.obj.connection_info, old_config_hash, state_file), + ClusterConfigHasChanged( + ctx.obj.connection_info, old_config_hash, state_file, save_config + ), nagiosplugin.ScalarContext("is_configuration_changed", None, "@1:1"), ClusterConfigHasChangedSummary(old_config_hash), ) @@ -455,9 +464,18 @@ def node_is_pending_restart(ctx: click.Context) -> None: type=str, help="A state file to store the last tl number into.", ) +@click.option( + "--save", + "save_tl", + is_flag=True, + default=False, + help="Set the current timeline number as the reference for future calls.", +) @click.pass_context @nagiosplugin.guarded -def node_tl_has_changed(ctx: click.Context, timeline: str, state_file: str) -> None: +def node_tl_has_changed( + ctx: click.Context, timeline: str, state_file: str, save_tl: bool +) -> None: """Check if the timeline has changed. Note: either a timeline or a state file must be provided for this service to work. @@ -488,7 +506,7 @@ def node_tl_has_changed(ctx: click.Context, timeline: str, state_file: str) -> N check = nagiosplugin.Check() check.add( - NodeTLHasChanged(ctx.obj.connection_info, old_timeline, state_file), + NodeTLHasChanged(ctx.obj.connection_info, old_timeline, state_file, save_tl), nagiosplugin.ScalarContext("is_timeline_changed", None, "@1:1"), nagiosplugin.ScalarContext("timeline"), NodeTLHasChangedSummary(old_timeline), diff --git a/check_patroni/cluster.py b/check_patroni/cluster.py index 11916a0..9ad9ec2 100644 --- a/check_patroni/cluster.py +++ b/check_patroni/cluster.py @@ -132,10 +132,12 @@ class ClusterConfigHasChanged(PatroniResource): connection_info: ConnectionInfo, config_hash: str, # Always contains the old hash state_file: str, # Only used to update the hash in the state_file (when needed) + save: bool = False, # Save the configuration ): super().__init__(connection_info) self.state_file = state_file self.config_hash = config_hash + self.save = save def probe(self: "ClusterConfigHasChanged") -> Iterable[nagiosplugin.Metric]: r = self.rest_api("config") @@ -144,9 +146,10 @@ class ClusterConfigHasChanged(PatroniResource): new_hash = hashlib.md5(r.data).hexdigest() + _log.debug(f"save result: {self.save}") old_hash = self.config_hash - if self.state_file is not None: - _log.debug(f"Saving new hash to state file / cookie {self.state_file}") + if self.state_file is not None and self.save: + _log.debug(f"saving new hash to state file / cookie {self.state_file}") cookie = nagiosplugin.Cookie(self.state_file) cookie.open() cookie["hash"] = new_hash diff --git a/check_patroni/node.py b/check_patroni/node.py index 8c5d935..ae12569 100644 --- a/check_patroni/node.py +++ b/check_patroni/node.py @@ -95,10 +95,12 @@ class NodeTLHasChanged(PatroniResource): connection_info: ConnectionInfo, timeline: str, # Always contains the old timeline state_file: str, # Only used to update the timeline in the state_file (when needed) + save: bool, # save timeline in state file ) -> None: super().__init__(connection_info) self.state_file = state_file self.timeline = timeline + self.save = save def probe(self: "NodeTLHasChanged") -> Iterable[nagiosplugin.Metric]: r = self.rest_api("patroni") @@ -108,9 +110,10 @@ class NodeTLHasChanged(PatroniResource): item_dict = json.loads(r.data) new_tl = item_dict["timeline"] + _log.debug(f"save result: {self.save}") old_tl = self.timeline - if self.state_file is not None: - _log.debug(f"Saving new timeline to state file / cookie {self.state_file}") + if self.state_file is not None and self.save: + _log.debug(f"saving new timeline to state file / cookie {self.state_file}") cookie = nagiosplugin.Cookie(self.state_file) cookie.open() cookie["timeline"] = new_tl diff --git a/tests/test_cluster_config_has_changed.py b/tests/test_cluster_config_has_changed.py index 1a4ca59..516c052 100644 --- a/tests/test_cluster_config_has_changed.py +++ b/tests/test_cluster_config_has_changed.py @@ -45,6 +45,7 @@ def test_cluster_config_has_changed_ok_with_hash(mocker: MockerFixture) -> None: "640df9f0211c791723f18fc3ed9dbb95", ], ) + print(result.output) assert result.exit_code == 0 @@ -85,13 +86,16 @@ def test_cluster_config_has_changed_ko_with_hash(mocker: MockerFixture) -> None: assert result.exit_code == 2 -def test_cluster_config_has_changed_ko_with_state_file(mocker: MockerFixture) -> None: +def test_cluster_config_has_changed_ko_with_state_file_and_save( + mocker: MockerFixture, +) -> None: runner = CliRunner() with open(here / "cluster_config_has_changed.state_file", "w") as f: f.write('{"hash": "640df9f0211c791723f18fc3edffffff"}') my_mock(mocker, "cluster_config_has_changed", 200) + # test without saving the new hash result = runner.invoke( main, [ @@ -104,7 +108,27 @@ def test_cluster_config_has_changed_ko_with_state_file(mocker: MockerFixture) -> ) assert result.exit_code == 2 - # the new hash was saved + cookie = nagiosplugin.Cookie(here / "cluster_config_has_changed.state_file") + cookie.open() + new_config_hash = cookie.get("hash") + cookie.close() + + assert new_config_hash == "640df9f0211c791723f18fc3edffffff" + + # test when we save the hash + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "cluster_config_has_changed", + "--state-file", + str(here / "cluster_config_has_changed.state_file"), + "--save", + ], + ) + assert result.exit_code == 2 + cookie = nagiosplugin.Cookie(here / "cluster_config_has_changed.state_file") cookie.open() new_config_hash = cookie.get("hash") diff --git a/tests/test_node_tl_has_changed.py b/tests/test_node_tl_has_changed.py index b609593..270f119 100644 --- a/tests/test_node_tl_has_changed.py +++ b/tests/test_node_tl_has_changed.py @@ -86,13 +86,14 @@ def test_node_tl_has_changed_ko_with_timeline(mocker: MockerFixture) -> None: assert result.exit_code == 2 -def test_node_tl_has_changed_ko_with_state_file(mocker: MockerFixture) -> None: +def test_node_tl_has_changed_ko_with_state_file_and_save(mocker: MockerFixture) -> None: runner = CliRunner() with open(here / "node_tl_has_changed.state_file", "w") as f: f.write('{"timeline": 700}') my_mock(mocker, "node_tl_has_changed", 200) + # test without saving the new tl result = runner.invoke( main, [ @@ -105,7 +106,27 @@ def test_node_tl_has_changed_ko_with_state_file(mocker: MockerFixture) -> None: ) assert result.exit_code == 2 - # the new timeline was saved + cookie = nagiosplugin.Cookie(here / "node_tl_has_changed.state_file") + cookie.open() + new_tl = cookie.get("timeline") + cookie.close() + + assert new_tl == 700 + + # test when we save the hash + result = runner.invoke( + main, + [ + "-e", + "https://10.20.199.3:8008", + "node_tl_has_changed", + "--state-file", + str(here / "node_tl_has_changed.state_file"), + "--save", + ], + ) + assert result.exit_code == 2 + cookie = nagiosplugin.Cookie(here / "node_tl_has_changed.state_file") cookie.open() new_tl = cookie.get("timeline")