Move from urllib3 to requests

This commit is contained in:
benoit 2023-03-12 19:43:06 +01:00 committed by Benoit
parent 0800fc72e9
commit 9cd80f5af8
10 changed files with 143 additions and 86 deletions

View file

@ -101,6 +101,33 @@ For example, the following command will raise:
```
check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1:
```
## SSL
Several option are available:
* you have a self-signed certificate:
* `--ca_cert`: your certification chain `cat CA-certificate server-certificate > cabundle`
* you have a valid root certificate:
* `--cert_file`: your certificate or the concatenation of your certificate and private key
* `--key_file`: your private key (optional)
* `--ca_cert`: if your CA certificate is not installed on the server you can provide it here (optional)
* unsafe access: dont provide any info, you will get a warning as described below.
If you configuration is unsafe you might get warning message such as:
```
$ check_patroni -e https://p1:8008 cluster_node_count
/home/vagrant/.local/lib/python3.9/site-packages/urllib3/connectionpool.py:1045: InsecureRequestWarning: Unverified HTTPS request is being made to host 'p1'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
warnings.warn(
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
```
After checking on the message, you can choose to ignore it by redirecting the
standart output to /dev/null:
```
$ check_patroni -e https://p1:8008 cluster_node_count 2>/dev/null
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
```
## Cluster services
@ -269,7 +296,7 @@ Usage: check_patroni node_is_pending_restart [OPTIONS]
a restart of PostgreSQL to take effect.
Check:
* `OK`: if the node has pending restart tag.
* `OK`: if the node has no pending restart tag.
* `CRITICAL`: otherwise
Perfdata: `is_pending_restart` is 1 if the node has pending restart tag, 0

View file

@ -1,3 +1,4 @@
import logging
import re
from configparser import ConfigParser
@ -32,6 +33,9 @@ from .node import (
from .types import ConnectionInfo, Parameters
from .convert import size_to_byte
_log = logging.getLogger("nagiosplugin")
DEFAULT_CFG = "config.ini"
def print_version(ctx: click.Context, param: str, value: str) -> None:
if not value or ctx.resilient_parsing:
@ -40,9 +44,6 @@ def print_version(ctx: click.Context, param: str, value: str) -> None:
ctx.exit()
DEFAULT_CFG = "config.ini"
def configure(ctx: click.Context, param: str, filename: str) -> None:
"""Use a config file for the parameters
stolen from https://jwodder.github.io/kbits/posts/click-config/

View file

@ -17,11 +17,7 @@ def replace_chars(text: str) -> str:
class ClusterNodeCount(PatroniResource):
def probe(self: "ClusterNodeCount") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("cluster")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
item_dict = self.rest_api("cluster")
role_counters: Counter[str] = Counter()
roles = []
status_counters: Counter[str] = Counter()
@ -53,11 +49,8 @@ class ClusterNodeCount(PatroniResource):
class ClusterHasLeader(PatroniResource):
def probe(self: "ClusterHasLeader") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("cluster")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = self.rest_api("cluster")
item_dict = json.loads(r.data)
is_leader_found = False
for member in item_dict["members"]:
if member["role"] == "leader" and member["state"] == "running":
@ -91,11 +84,8 @@ class ClusterHasReplica(PatroniResource):
self.max_lag = max_lag
def probe(self: "ClusterHasReplica") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("cluster")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = self.rest_api("cluster")
item_dict = json.loads(r.data)
replicas = []
healthy_replica = 0
unhealthy_replica = 0
@ -140,11 +130,9 @@ class ClusterConfigHasChanged(PatroniResource):
self.save = save
def probe(self: "ClusterConfigHasChanged") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("config")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = self.rest_api("config")
new_hash = hashlib.md5(r.data).hexdigest()
new_hash = hashlib.md5(json.dumps(item_dict).encode()).hexdigest()
_log.debug(f"save result: {self.save}")
old_hash = self.config_hash
@ -184,11 +172,7 @@ class ClusterConfigHasChangedSummary(nagiosplugin.Summary):
class ClusterIsInMaintenance(PatroniResource):
def probe(self: "ClusterIsInMaintenance") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("cluster")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
item_dict = self.rest_api("cluster")
# The actual check
return [

View file

@ -1,22 +1,20 @@
import json
import logging
import nagiosplugin
from typing import Iterable
from .types import ConnectionInfo, handle_unknown, PatroniResource
from .types import APIError, ConnectionInfo, handle_unknown, PatroniResource
_log = logging.getLogger("nagiosplugin")
class NodeIsPrimary(PatroniResource):
def probe(self: "NodeIsPrimary") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("primary")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
return [nagiosplugin.Metric("is_primary", 1 if r.status == 200 else 0)]
try:
self.rest_api("primary")
except APIError:
return [nagiosplugin.Metric("is_primary", 0)]
return [nagiosplugin.Metric("is_primary", 1)]
class NodeIsPrimarySummary(nagiosplugin.Summary):
@ -36,14 +34,14 @@ class NodeIsReplica(PatroniResource):
self.max_lag = max_lag
def probe(self: "NodeIsReplica") -> Iterable[nagiosplugin.Metric]:
if self.max_lag is None:
r = self.rest_api("replica")
else:
r = self.rest_api(f"replica?lag={self.max_lag}")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
return [nagiosplugin.Metric("is_replica", 1 if r.status == 200 else 0)]
try:
if self.max_lag is None:
self.rest_api("replica")
else:
self.rest_api(f"replica?lag={self.max_lag}")
except APIError:
return [nagiosplugin.Metric("is_replica", 0)]
return [nagiosplugin.Metric("is_replica", 1)]
class NodeIsReplicaSummary(nagiosplugin.Summary):
@ -64,11 +62,8 @@ class NodeIsReplicaSummary(nagiosplugin.Summary):
class NodeIsPendingRestart(PatroniResource):
def probe(self: "NodeIsPendingRestart") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("patroni")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = self.rest_api("patroni")
item_dict = json.loads(r.data)
is_pending_restart = item_dict.get("pending_restart", False)
return [
nagiosplugin.Metric(
@ -103,11 +98,7 @@ class NodeTLHasChanged(PatroniResource):
self.save = save
def probe(self: "NodeTLHasChanged") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("patroni")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
item_dict = self.rest_api("patroni")
new_tl = item_dict["timeline"]
_log.debug(f"save result: {self.save}")
@ -154,12 +145,8 @@ class NodePatroniVersion(PatroniResource):
self.patroni_version = patroni_version
def probe(self: "NodePatroniVersion") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("patroni")
item_dict = self.rest_api("patroni")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
version = item_dict["patroni"]["version"]
_log.debug(
f"Version data: patroni version {version} input version {self.patroni_version}"
@ -190,11 +177,11 @@ class NodePatroniVersionSummary(nagiosplugin.Summary):
class NodeIsAlive(PatroniResource):
def probe(self: "NodeIsAlive") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("liveness")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
return [nagiosplugin.Metric("is_alive", 1 if r.status == 200 else 0)]
try:
self.rest_api("liveness")
except APIError:
return [nagiosplugin.Metric("is_alive", 0)]
return [nagiosplugin.Metric("is_alive", 1)]
class NodeIsAliveSummary(nagiosplugin.Summary):

View file

@ -1,19 +1,26 @@
import logging
import urllib3
import attr
import nagiosplugin
from typing import Any, Callable, List
import requests
import urllib3
from typing import Any, Callable, List, Optional, Tuple, Union
_log = logging.getLogger("nagiosplugin")
class APIError(requests.exceptions.RequestException):
"""This exception is raised when the rest api couldn't
be reached and we got a http status code different from 200.
"""
@attr.s(auto_attribs=True, frozen=True, slots=True)
class ConnectionInfo:
endpoints: List[str] = ["http://127.0.0.1:8008"]
cert_file: str = "./ssl/benoit-dalibo-cert.pem"
key_file: str = "./ssl/benoit-dalibo-key.pem"
ca_cert: str = "./ssl/CA-cert.pem"
cert_file: Optional[str] = None
key_file: Optional[str] = None
ca_cert: Optional[str] = None
@attr.s(auto_attribs=True, frozen=True, slots=True)
@ -27,27 +34,52 @@ class Parameters:
class PatroniResource(nagiosplugin.Resource):
conn_info: ConnectionInfo
def rest_api(
self: "PatroniResource", service: str
) -> urllib3.response.HTTPResponse:
def rest_api(self: "PatroniResource", service: str) -> Any:
"""Try to connect to all the provided endpoints for the requested service"""
for endpoint in self.conn_info.endpoints:
try:
cert: Optional[Union[Tuple[str, str], str]] = None
verify: Optional[Union[str, bool]] = None
if endpoint[:5] == "https":
pool = urllib3.PoolManager(
cert_reqs="CERT_REQUIRED",
cert_file=self.conn_info.cert_file,
key_file=self.conn_info.key_file,
ca_certs=self.conn_info.ca_cert,
)
else:
pool = urllib3.PoolManager()
if (
self.conn_info.cert_file is not None
and self.conn_info.key_file is not None # noqa W503
):
# we provide a certificate and a private key
cert = (self.conn_info.cert_file, self.conn_info.key_file)
elif (
self.conn_info.cert_file is not None
and self.conn_info.key_file is None # noqa W503
):
# we provide a pem file with the private key and the certificate
cert = self.conn_info.cert_file
_log.debug(f"Trying to connect to {endpoint}/{service}")
return pool.request(
"GET",
f"{endpoint}/{service}",
if self.conn_info.ca_cert is not None:
# if cert is not None: this is the CA certificate
# otherwise this is a ca bundle with root certificate
# then some optional intermediate certificate and finally
# the cerver certificate to validate the certification chain
verify = self.conn_info.ca_cert
else:
if cert is None:
# if cert is None we want to bypass https verification,
# this is in secure and should be avoided for production use
verify = False
_log.debug(
f"Trying to connect to {endpoint}/{service} with cert: {cert} verify: {verify}"
)
r = requests.get(f"{endpoint}/{service}", verify=verify, cert=cert)
_log.debug(f"api call status: {r.status_code}")
_log.debug(f"api call data: {r.text}")
if r.status_code != 200:
raise APIError(
f"Failed to connect to {endpoint}/{service} status code {r.status_code}"
)
return r.json()
except nagiosplugin.Timeout as e:
raise e
except Exception as e:

View file

@ -85,6 +85,33 @@ For example, the following command will raise:
```
check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1:
```
## SSL
Several option are available:
* you have a self-signed certificate:
* `--ca_cert`: your certification chain `cat CA-certificate server-certificate > cabundle`
* you have a valid root certificate:
* `--cert_file`: your certificate or the concatenation of your certificate and private key
* `--key_file`: your private key (optional)
* `--ca_cert`: if your CA certificate is not installed on the server you can provide it here (optional)
* unsafe access: dont provide any info, you will get a warning as described below.
If you configuration is unsafe you might get warning message such as:
```
$ check_patroni -e https://p1:8008 cluster_node_count
/home/vagrant/.local/lib/python3.9/site-packages/urllib3/connectionpool.py:1045: InsecureRequestWarning: Unverified HTTPS request is being made to host 'p1'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
warnings.warn(
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
```
After checking on the message, you can choose to ignore it by redirecting the
standart output to /dev/null:
```
$ check_patroni -e https://p1:8008 cluster_node_count 2>/dev/null
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
```
_EOF_
readme
readme "## Cluster services"

View file

@ -6,9 +6,6 @@ exclude = build/
[mypy-setup]
ignore_errors = True
[mypy-urllib3.*]
ignore_missing_imports = true
[mypy-nagiosplugin.*]
ignore_missing_imports = true

View file

@ -4,6 +4,7 @@ flake8
mypy==0.961
pytest
pytest-mock
types-requests
setuptools
tox
twine

View file

@ -39,7 +39,7 @@ setup(
python_requires=">=3.6",
install_requires=[
"attrs >= 17, !=21.1",
"urllib3 >= 1.26.6",
"requests",
"nagiosplugin >= 1.3.2",
"click >= 8.0.1",
],

View file

@ -24,7 +24,8 @@ commands =
deps =
mypy == 0.961
commands =
mypy {toxinidir}/check_patroni
# we need to install types-requests
mypy --install-types --non-interactive {toxinidir}/check_patroni
[testenv:build]
deps =