diff --git a/README.md b/README.md index 73417ae..c8b4fcb 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Options: Commands: cluster_config_has_changed Check if the hash of the configuration has... cluster_has_leader Check if the cluster has a leader. - cluster_has_replica Check if the cluster has healthy replicates. + cluster_has_replica Check if the cluster has healthy replicas. cluster_is_in_maintenance Check if the cluster is in maintenance mode... cluster_node_count Count the number of nodes in the cluster. node_is_alive Check if the node is alive ie patroni is... @@ -133,6 +133,8 @@ Usage: check_patroni cluster_has_leader [OPTIONS] Check if the cluster has a leader. + Note: there is no difference between a normal and standby leader. + Check: * `OK`: if there is a leader node. * `CRITICAL`: otherwise @@ -148,9 +150,9 @@ Options: ``` Usage: check_patroni cluster_has_replica [OPTIONS] - Check if the cluster has healthy replicates. + Check if the cluster has healthy replicas. - A healthy replicate: + A healthy replica: * is in running state * has a replica role * has a lag lower or equal to max_lag @@ -164,8 +166,10 @@ Usage: check_patroni cluster_has_replica [OPTIONS] * the lag of each replica labelled with "member name"_lag Options: - -w, --warning TEXT Warning threshold for the number of nodes. - -c, --critical TEXT Critical threshold for the number of replica nodes. + -w, --warning TEXT Warning threshold for the number of healthy replica + nodes. + -c, --critical TEXT Critical threshold for the number of healthy replica + nodes. --max-lag TEXT maximum allowed lag --help Show this message and exit. ``` @@ -197,7 +201,7 @@ Usage: check_patroni cluster_node_count [OPTIONS] Check: * Compares the number of nodes against the normal and running node warning and critical thresholds. - * `OK`! If they are not provided. + * `OK`: If they are not provided. Perfdata: * `members`: the member count. @@ -206,9 +210,9 @@ Usage: check_patroni cluster_node_count [OPTIONS] Options: -w, --warning TEXT Warning threshold for the number of nodes. - -c, --critical TEXT Critical threshold for the nimber of nodes. + -c, --critical TEXT Critical threshold for the number of nodes. --running-warning TEXT Warning threshold for the number of running nodes. - --running-critical TEXT Critical threshold for the nimber of running nodes. + --running-critical TEXT Critical threshold for the number of running nodes. --help Show this message and exit. ``` diff --git a/check_patroni/cli.py b/check_patroni/cli.py index ca5b821..1509334 100644 --- a/check_patroni/cli.py +++ b/check_patroni/cli.py @@ -164,7 +164,7 @@ def main( "--critical", "critical", type=str, - help="Critical threshold for the nimber of nodes.", + help="Critical threshold for the number of nodes.", ) @click.option( "--running-warning", @@ -176,7 +176,7 @@ def main( "--running-critical", "running_critical", type=str, - help="Critical threshold for the nimber of running nodes.", + help="Critical threshold for the number of running nodes.", ) @click.pass_context @nagiosplugin.guarded @@ -192,7 +192,7 @@ def cluster_node_count( \b Check: * Compares the number of nodes against the normal and running node warning and critical thresholds. - * `OK`! If they are not provided. + * `OK`: If they are not provided. \b Perfdata: @@ -213,8 +213,8 @@ def cluster_node_count( running_warning, running_critical, ), - nagiosplugin.ScalarContext("members_roles"), - nagiosplugin.ScalarContext("members_statuses"), + nagiosplugin.ScalarContext("member_roles"), + nagiosplugin.ScalarContext("member_statuses"), ) check.main(verbose=ctx.obj.verbose, timeout=ctx.obj.timeout) @@ -225,6 +225,8 @@ def cluster_node_count( def cluster_has_leader(ctx: click.Context) -> None: """Check if the cluster has a leader. + Note: there is no difference between a normal and standby leader. + \b Check: * `OK`: if there is a leader node. @@ -232,7 +234,6 @@ def cluster_has_leader(ctx: click.Context) -> None: Perfdata: `has_leader` is 1 if there is a leader node, 0 otherwise """ - # FIXME: Manage primary or standby leader in the same place ? check = nagiosplugin.Check() check.add( ClusterHasLeader(ctx.obj.connection_info), @@ -248,14 +249,14 @@ def cluster_has_leader(ctx: click.Context) -> None: "--warning", "warning", type=str, - help="Warning threshold for the number of nodes.", + help="Warning threshold for the number of healthy replica nodes.", ) @click.option( "-c", "--critical", "critical", type=str, - help="Critical threshold for the number of replica nodes.", + help="Critical threshold for the number of healthy replica nodes.", ) @click.option("--max-lag", "max_lag", type=str, help="maximum allowed lag") @click.pass_context @@ -263,10 +264,10 @@ def cluster_has_leader(ctx: click.Context) -> None: def cluster_has_replica( ctx: click.Context, warning: str, critical: str, max_lag: str ) -> None: - """Check if the cluster has healthy replicates. + """Check if the cluster has healthy replicas. \b - A healthy replicate: + A healthy replica: * is in running state * has a replica role * has a lag lower or equal to max_lag @@ -324,7 +325,7 @@ def cluster_config_has_changed( Perfdata: * `is_configuration_changed` is 1 if the configuration has changed """ - # FIXME hash in perfdata ? + # Note: hash cannot be in the perf data = not a number if (config_hash is None and state_file is None) or ( config_hash is not None and state_file is not None ): diff --git a/check_patroni/cluster.py b/check_patroni/cluster.py index a1ddf37..e7a88c5 100644 --- a/check_patroni/cluster.py +++ b/check_patroni/cluster.py @@ -39,14 +39,14 @@ class ClusterNodeCount(PatroniResource): # The performance data : role for role in role_counters: yield nagiosplugin.Metric( - f"role_{role}", role_counters[role], context="members_roles" + f"role_{role}", role_counters[role], context="member_roles" ) # The performance data : statuses (except running) for state in status_counters: if state != "running": yield nagiosplugin.Metric( - f"state_{state}", status_counters[state], context="members_statuses" + f"state_{state}", status_counters[state], context="member_statuses" ) @@ -111,7 +111,7 @@ class ClusterHasReplica(PatroniResource): # The actual check yield nagiosplugin.Metric("healthy_replica", healthy_replica) - # The performance data : unheakthy replica count, replicas lag + # The performance data : unhealthy replica count, replicas lag yield nagiosplugin.Metric("unhealthy_replica", unhealthy_replica) for replica in replicas: yield nagiosplugin.Metric( @@ -165,6 +165,7 @@ class ClusterConfigHasChanged(PatroniResource): class ClusterConfigHasChangedSummary(nagiosplugin.Summary): + # TODO: It would be helpful to display the old / new hash here, but it's not a metric. def ok(self: "ClusterConfigHasChangedSummary", results: nagiosplugin.Result) -> str: return "The hash of patroni's dynamic configuration has not changed."