Add a --save
option when state files are used
The checks `cluster_config_has_changed` and `node_tl_has_changed` use a state file to store the previous value of the config hash and the timeline. Previously the check would fail if something changed, but the new value would be saved directly. This behavious has changed. The new value is saved only if `--save` is passed to the check. The mimics the way [check_pgactivity] manages this kind of checks. [check_pgactivity]: https://github.com/OPMDG/check_pgactivity
This commit is contained in:
parent
e7e7ac2e3a
commit
908669f073
|
@ -61,7 +61,6 @@ issue](https://github.com/dalibo/check_patroni/issues/new). Dalibo has no
|
||||||
commitment on response time for public free support. Thanks for you
|
commitment on response time for public free support. Thanks for you
|
||||||
contribution !
|
contribution !
|
||||||
|
|
||||||
|
|
||||||
## Config file
|
## Config file
|
||||||
|
|
||||||
All global and service specific parameters can be specified via a config file has follows:
|
All global and service specific parameters can be specified via a config file has follows:
|
||||||
|
@ -119,6 +118,8 @@ Usage: check_patroni cluster_config_has_changed [OPTIONS]
|
||||||
Options:
|
Options:
|
||||||
--hash TEXT A hash to compare with.
|
--hash TEXT A hash to compare with.
|
||||||
-s, --state-file TEXT A state file to store the hash of the configuration.
|
-s, --state-file TEXT A state file to store the hash of the configuration.
|
||||||
|
--save Set the current configuration hash as the reference
|
||||||
|
for future calls.
|
||||||
--help Show this message and exit.
|
--help Show this message and exit.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -320,7 +321,7 @@ Usage: check_patroni node_tl_has_changed [OPTIONS]
|
||||||
work.
|
work.
|
||||||
|
|
||||||
Check:
|
Check:
|
||||||
* `OK`: The timeline is the same as last time (`--state_file`) or the inputed timeline (`--timeline`)
|
* `OK`: The timeline is the same as last time (`--state_file`) or the inputted timeline (`--timeline`)
|
||||||
* `CRITICAL`: The tl is not the same.
|
* `CRITICAL`: The tl is not the same.
|
||||||
|
|
||||||
Perfdata:
|
Perfdata:
|
||||||
|
@ -330,6 +331,8 @@ Usage: check_patroni node_tl_has_changed [OPTIONS]
|
||||||
Options:
|
Options:
|
||||||
--timeline TEXT A timeline number to compare with.
|
--timeline TEXT A timeline number to compare with.
|
||||||
-s, --state-file TEXT A state file to store the last tl number into.
|
-s, --state-file TEXT A state file to store the last tl number into.
|
||||||
|
--save Set the current timeline number as the reference for
|
||||||
|
future calls.
|
||||||
--help Show this message and exit.
|
--help Show this message and exit.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -308,10 +308,17 @@ def cluster_has_replica(
|
||||||
type=str,
|
type=str,
|
||||||
help="A state file to store the hash of the configuration.",
|
help="A state file to store the hash of the configuration.",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--save",
|
||||||
|
"save_config",
|
||||||
|
is_flag=True,
|
||||||
|
default=False,
|
||||||
|
help="Set the current configuration hash as the reference for future calls.",
|
||||||
|
)
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
@nagiosplugin.guarded
|
@nagiosplugin.guarded
|
||||||
def cluster_config_has_changed(
|
def cluster_config_has_changed(
|
||||||
ctx: click.Context, config_hash: str, state_file: str
|
ctx: click.Context, config_hash: str, state_file: str, save_config: bool
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Check if the hash of the configuration has changed.
|
"""Check if the hash of the configuration has changed.
|
||||||
|
|
||||||
|
@ -343,7 +350,9 @@ def cluster_config_has_changed(
|
||||||
|
|
||||||
check = nagiosplugin.Check()
|
check = nagiosplugin.Check()
|
||||||
check.add(
|
check.add(
|
||||||
ClusterConfigHasChanged(ctx.obj.connection_info, old_config_hash, state_file),
|
ClusterConfigHasChanged(
|
||||||
|
ctx.obj.connection_info, old_config_hash, state_file, save_config
|
||||||
|
),
|
||||||
nagiosplugin.ScalarContext("is_configuration_changed", None, "@1:1"),
|
nagiosplugin.ScalarContext("is_configuration_changed", None, "@1:1"),
|
||||||
ClusterConfigHasChangedSummary(old_config_hash),
|
ClusterConfigHasChangedSummary(old_config_hash),
|
||||||
)
|
)
|
||||||
|
@ -455,9 +464,18 @@ def node_is_pending_restart(ctx: click.Context) -> None:
|
||||||
type=str,
|
type=str,
|
||||||
help="A state file to store the last tl number into.",
|
help="A state file to store the last tl number into.",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--save",
|
||||||
|
"save_tl",
|
||||||
|
is_flag=True,
|
||||||
|
default=False,
|
||||||
|
help="Set the current timeline number as the reference for future calls.",
|
||||||
|
)
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
@nagiosplugin.guarded
|
@nagiosplugin.guarded
|
||||||
def node_tl_has_changed(ctx: click.Context, timeline: str, state_file: str) -> None:
|
def node_tl_has_changed(
|
||||||
|
ctx: click.Context, timeline: str, state_file: str, save_tl: bool
|
||||||
|
) -> None:
|
||||||
"""Check if the timeline has changed.
|
"""Check if the timeline has changed.
|
||||||
|
|
||||||
Note: either a timeline or a state file must be provided for this service to work.
|
Note: either a timeline or a state file must be provided for this service to work.
|
||||||
|
@ -488,7 +506,7 @@ def node_tl_has_changed(ctx: click.Context, timeline: str, state_file: str) -> N
|
||||||
|
|
||||||
check = nagiosplugin.Check()
|
check = nagiosplugin.Check()
|
||||||
check.add(
|
check.add(
|
||||||
NodeTLHasChanged(ctx.obj.connection_info, old_timeline, state_file),
|
NodeTLHasChanged(ctx.obj.connection_info, old_timeline, state_file, save_tl),
|
||||||
nagiosplugin.ScalarContext("is_timeline_changed", None, "@1:1"),
|
nagiosplugin.ScalarContext("is_timeline_changed", None, "@1:1"),
|
||||||
nagiosplugin.ScalarContext("timeline"),
|
nagiosplugin.ScalarContext("timeline"),
|
||||||
NodeTLHasChangedSummary(old_timeline),
|
NodeTLHasChangedSummary(old_timeline),
|
||||||
|
|
|
@ -132,10 +132,12 @@ class ClusterConfigHasChanged(PatroniResource):
|
||||||
connection_info: ConnectionInfo,
|
connection_info: ConnectionInfo,
|
||||||
config_hash: str, # Always contains the old hash
|
config_hash: str, # Always contains the old hash
|
||||||
state_file: str, # Only used to update the hash in the state_file (when needed)
|
state_file: str, # Only used to update the hash in the state_file (when needed)
|
||||||
|
save: bool = False, # Save the configuration
|
||||||
):
|
):
|
||||||
super().__init__(connection_info)
|
super().__init__(connection_info)
|
||||||
self.state_file = state_file
|
self.state_file = state_file
|
||||||
self.config_hash = config_hash
|
self.config_hash = config_hash
|
||||||
|
self.save = save
|
||||||
|
|
||||||
def probe(self: "ClusterConfigHasChanged") -> Iterable[nagiosplugin.Metric]:
|
def probe(self: "ClusterConfigHasChanged") -> Iterable[nagiosplugin.Metric]:
|
||||||
r = self.rest_api("config")
|
r = self.rest_api("config")
|
||||||
|
@ -144,9 +146,10 @@ class ClusterConfigHasChanged(PatroniResource):
|
||||||
|
|
||||||
new_hash = hashlib.md5(r.data).hexdigest()
|
new_hash = hashlib.md5(r.data).hexdigest()
|
||||||
|
|
||||||
|
_log.debug(f"save result: {self.save}")
|
||||||
old_hash = self.config_hash
|
old_hash = self.config_hash
|
||||||
if self.state_file is not None:
|
if self.state_file is not None and self.save:
|
||||||
_log.debug(f"Saving new hash to state file / cookie {self.state_file}")
|
_log.debug(f"saving new hash to state file / cookie {self.state_file}")
|
||||||
cookie = nagiosplugin.Cookie(self.state_file)
|
cookie = nagiosplugin.Cookie(self.state_file)
|
||||||
cookie.open()
|
cookie.open()
|
||||||
cookie["hash"] = new_hash
|
cookie["hash"] = new_hash
|
||||||
|
|
|
@ -95,10 +95,12 @@ class NodeTLHasChanged(PatroniResource):
|
||||||
connection_info: ConnectionInfo,
|
connection_info: ConnectionInfo,
|
||||||
timeline: str, # Always contains the old timeline
|
timeline: str, # Always contains the old timeline
|
||||||
state_file: str, # Only used to update the timeline in the state_file (when needed)
|
state_file: str, # Only used to update the timeline in the state_file (when needed)
|
||||||
|
save: bool, # save timeline in state file
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(connection_info)
|
super().__init__(connection_info)
|
||||||
self.state_file = state_file
|
self.state_file = state_file
|
||||||
self.timeline = timeline
|
self.timeline = timeline
|
||||||
|
self.save = save
|
||||||
|
|
||||||
def probe(self: "NodeTLHasChanged") -> Iterable[nagiosplugin.Metric]:
|
def probe(self: "NodeTLHasChanged") -> Iterable[nagiosplugin.Metric]:
|
||||||
r = self.rest_api("patroni")
|
r = self.rest_api("patroni")
|
||||||
|
@ -108,9 +110,10 @@ class NodeTLHasChanged(PatroniResource):
|
||||||
item_dict = json.loads(r.data)
|
item_dict = json.loads(r.data)
|
||||||
new_tl = item_dict["timeline"]
|
new_tl = item_dict["timeline"]
|
||||||
|
|
||||||
|
_log.debug(f"save result: {self.save}")
|
||||||
old_tl = self.timeline
|
old_tl = self.timeline
|
||||||
if self.state_file is not None:
|
if self.state_file is not None and self.save:
|
||||||
_log.debug(f"Saving new timeline to state file / cookie {self.state_file}")
|
_log.debug(f"saving new timeline to state file / cookie {self.state_file}")
|
||||||
cookie = nagiosplugin.Cookie(self.state_file)
|
cookie = nagiosplugin.Cookie(self.state_file)
|
||||||
cookie.open()
|
cookie.open()
|
||||||
cookie["timeline"] = new_tl
|
cookie["timeline"] = new_tl
|
||||||
|
|
|
@ -45,6 +45,7 @@ def test_cluster_config_has_changed_ok_with_hash(mocker: MockerFixture) -> None:
|
||||||
"640df9f0211c791723f18fc3ed9dbb95",
|
"640df9f0211c791723f18fc3ed9dbb95",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
print(result.output)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,13 +86,16 @@ def test_cluster_config_has_changed_ko_with_hash(mocker: MockerFixture) -> None:
|
||||||
assert result.exit_code == 2
|
assert result.exit_code == 2
|
||||||
|
|
||||||
|
|
||||||
def test_cluster_config_has_changed_ko_with_state_file(mocker: MockerFixture) -> None:
|
def test_cluster_config_has_changed_ko_with_state_file_and_save(
|
||||||
|
mocker: MockerFixture,
|
||||||
|
) -> None:
|
||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
|
|
||||||
with open(here / "cluster_config_has_changed.state_file", "w") as f:
|
with open(here / "cluster_config_has_changed.state_file", "w") as f:
|
||||||
f.write('{"hash": "640df9f0211c791723f18fc3edffffff"}')
|
f.write('{"hash": "640df9f0211c791723f18fc3edffffff"}')
|
||||||
|
|
||||||
my_mock(mocker, "cluster_config_has_changed", 200)
|
my_mock(mocker, "cluster_config_has_changed", 200)
|
||||||
|
# test without saving the new hash
|
||||||
result = runner.invoke(
|
result = runner.invoke(
|
||||||
main,
|
main,
|
||||||
[
|
[
|
||||||
|
@ -104,7 +108,27 @@ def test_cluster_config_has_changed_ko_with_state_file(mocker: MockerFixture) ->
|
||||||
)
|
)
|
||||||
assert result.exit_code == 2
|
assert result.exit_code == 2
|
||||||
|
|
||||||
# the new hash was saved
|
cookie = nagiosplugin.Cookie(here / "cluster_config_has_changed.state_file")
|
||||||
|
cookie.open()
|
||||||
|
new_config_hash = cookie.get("hash")
|
||||||
|
cookie.close()
|
||||||
|
|
||||||
|
assert new_config_hash == "640df9f0211c791723f18fc3edffffff"
|
||||||
|
|
||||||
|
# test when we save the hash
|
||||||
|
result = runner.invoke(
|
||||||
|
main,
|
||||||
|
[
|
||||||
|
"-e",
|
||||||
|
"https://10.20.199.3:8008",
|
||||||
|
"cluster_config_has_changed",
|
||||||
|
"--state-file",
|
||||||
|
str(here / "cluster_config_has_changed.state_file"),
|
||||||
|
"--save",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert result.exit_code == 2
|
||||||
|
|
||||||
cookie = nagiosplugin.Cookie(here / "cluster_config_has_changed.state_file")
|
cookie = nagiosplugin.Cookie(here / "cluster_config_has_changed.state_file")
|
||||||
cookie.open()
|
cookie.open()
|
||||||
new_config_hash = cookie.get("hash")
|
new_config_hash = cookie.get("hash")
|
||||||
|
|
|
@ -86,13 +86,14 @@ def test_node_tl_has_changed_ko_with_timeline(mocker: MockerFixture) -> None:
|
||||||
assert result.exit_code == 2
|
assert result.exit_code == 2
|
||||||
|
|
||||||
|
|
||||||
def test_node_tl_has_changed_ko_with_state_file(mocker: MockerFixture) -> None:
|
def test_node_tl_has_changed_ko_with_state_file_and_save(mocker: MockerFixture) -> None:
|
||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
|
|
||||||
with open(here / "node_tl_has_changed.state_file", "w") as f:
|
with open(here / "node_tl_has_changed.state_file", "w") as f:
|
||||||
f.write('{"timeline": 700}')
|
f.write('{"timeline": 700}')
|
||||||
|
|
||||||
my_mock(mocker, "node_tl_has_changed", 200)
|
my_mock(mocker, "node_tl_has_changed", 200)
|
||||||
|
# test without saving the new tl
|
||||||
result = runner.invoke(
|
result = runner.invoke(
|
||||||
main,
|
main,
|
||||||
[
|
[
|
||||||
|
@ -105,7 +106,27 @@ def test_node_tl_has_changed_ko_with_state_file(mocker: MockerFixture) -> None:
|
||||||
)
|
)
|
||||||
assert result.exit_code == 2
|
assert result.exit_code == 2
|
||||||
|
|
||||||
# the new timeline was saved
|
cookie = nagiosplugin.Cookie(here / "node_tl_has_changed.state_file")
|
||||||
|
cookie.open()
|
||||||
|
new_tl = cookie.get("timeline")
|
||||||
|
cookie.close()
|
||||||
|
|
||||||
|
assert new_tl == 700
|
||||||
|
|
||||||
|
# test when we save the hash
|
||||||
|
result = runner.invoke(
|
||||||
|
main,
|
||||||
|
[
|
||||||
|
"-e",
|
||||||
|
"https://10.20.199.3:8008",
|
||||||
|
"node_tl_has_changed",
|
||||||
|
"--state-file",
|
||||||
|
str(here / "node_tl_has_changed.state_file"),
|
||||||
|
"--save",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert result.exit_code == 2
|
||||||
|
|
||||||
cookie = nagiosplugin.Cookie(here / "node_tl_has_changed.state_file")
|
cookie = nagiosplugin.Cookie(here / "node_tl_has_changed.state_file")
|
||||||
cookie.open()
|
cookie.open()
|
||||||
new_tl = cookie.get("timeline")
|
new_tl = cookie.get("timeline")
|
||||||
|
|
Loading…
Reference in a new issue