Move from urllib3 to requests

This commit is contained in:
benoit 2023-03-12 19:43:06 +01:00 committed by Benoit
parent 0800fc72e9
commit 9cd80f5af8
10 changed files with 143 additions and 86 deletions

View file

@ -101,6 +101,33 @@ For example, the following command will raise:
``` ```
check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1: check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1:
``` ```
## SSL
Several option are available:
* you have a self-signed certificate:
* `--ca_cert`: your certification chain `cat CA-certificate server-certificate > cabundle`
* you have a valid root certificate:
* `--cert_file`: your certificate or the concatenation of your certificate and private key
* `--key_file`: your private key (optional)
* `--ca_cert`: if your CA certificate is not installed on the server you can provide it here (optional)
* unsafe access: dont provide any info, you will get a warning as described below.
If you configuration is unsafe you might get warning message such as:
```
$ check_patroni -e https://p1:8008 cluster_node_count
/home/vagrant/.local/lib/python3.9/site-packages/urllib3/connectionpool.py:1045: InsecureRequestWarning: Unverified HTTPS request is being made to host 'p1'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
warnings.warn(
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
```
After checking on the message, you can choose to ignore it by redirecting the
standart output to /dev/null:
```
$ check_patroni -e https://p1:8008 cluster_node_count 2>/dev/null
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
```
## Cluster services ## Cluster services
@ -269,7 +296,7 @@ Usage: check_patroni node_is_pending_restart [OPTIONS]
a restart of PostgreSQL to take effect. a restart of PostgreSQL to take effect.
Check: Check:
* `OK`: if the node has pending restart tag. * `OK`: if the node has no pending restart tag.
* `CRITICAL`: otherwise * `CRITICAL`: otherwise
Perfdata: `is_pending_restart` is 1 if the node has pending restart tag, 0 Perfdata: `is_pending_restart` is 1 if the node has pending restart tag, 0

View file

@ -1,3 +1,4 @@
import logging
import re import re
from configparser import ConfigParser from configparser import ConfigParser
@ -32,6 +33,9 @@ from .node import (
from .types import ConnectionInfo, Parameters from .types import ConnectionInfo, Parameters
from .convert import size_to_byte from .convert import size_to_byte
_log = logging.getLogger("nagiosplugin")
DEFAULT_CFG = "config.ini"
def print_version(ctx: click.Context, param: str, value: str) -> None: def print_version(ctx: click.Context, param: str, value: str) -> None:
if not value or ctx.resilient_parsing: if not value or ctx.resilient_parsing:
@ -40,9 +44,6 @@ def print_version(ctx: click.Context, param: str, value: str) -> None:
ctx.exit() ctx.exit()
DEFAULT_CFG = "config.ini"
def configure(ctx: click.Context, param: str, filename: str) -> None: def configure(ctx: click.Context, param: str, filename: str) -> None:
"""Use a config file for the parameters """Use a config file for the parameters
stolen from https://jwodder.github.io/kbits/posts/click-config/ stolen from https://jwodder.github.io/kbits/posts/click-config/

View file

@ -17,11 +17,7 @@ def replace_chars(text: str) -> str:
class ClusterNodeCount(PatroniResource): class ClusterNodeCount(PatroniResource):
def probe(self: "ClusterNodeCount") -> Iterable[nagiosplugin.Metric]: def probe(self: "ClusterNodeCount") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("cluster") item_dict = self.rest_api("cluster")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
role_counters: Counter[str] = Counter() role_counters: Counter[str] = Counter()
roles = [] roles = []
status_counters: Counter[str] = Counter() status_counters: Counter[str] = Counter()
@ -53,11 +49,8 @@ class ClusterNodeCount(PatroniResource):
class ClusterHasLeader(PatroniResource): class ClusterHasLeader(PatroniResource):
def probe(self: "ClusterHasLeader") -> Iterable[nagiosplugin.Metric]: def probe(self: "ClusterHasLeader") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("cluster") item_dict = self.rest_api("cluster")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
is_leader_found = False is_leader_found = False
for member in item_dict["members"]: for member in item_dict["members"]:
if member["role"] == "leader" and member["state"] == "running": if member["role"] == "leader" and member["state"] == "running":
@ -91,11 +84,8 @@ class ClusterHasReplica(PatroniResource):
self.max_lag = max_lag self.max_lag = max_lag
def probe(self: "ClusterHasReplica") -> Iterable[nagiosplugin.Metric]: def probe(self: "ClusterHasReplica") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("cluster") item_dict = self.rest_api("cluster")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
replicas = [] replicas = []
healthy_replica = 0 healthy_replica = 0
unhealthy_replica = 0 unhealthy_replica = 0
@ -140,11 +130,9 @@ class ClusterConfigHasChanged(PatroniResource):
self.save = save self.save = save
def probe(self: "ClusterConfigHasChanged") -> Iterable[nagiosplugin.Metric]: def probe(self: "ClusterConfigHasChanged") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("config") item_dict = self.rest_api("config")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
new_hash = hashlib.md5(r.data).hexdigest() new_hash = hashlib.md5(json.dumps(item_dict).encode()).hexdigest()
_log.debug(f"save result: {self.save}") _log.debug(f"save result: {self.save}")
old_hash = self.config_hash old_hash = self.config_hash
@ -184,11 +172,7 @@ class ClusterConfigHasChangedSummary(nagiosplugin.Summary):
class ClusterIsInMaintenance(PatroniResource): class ClusterIsInMaintenance(PatroniResource):
def probe(self: "ClusterIsInMaintenance") -> Iterable[nagiosplugin.Metric]: def probe(self: "ClusterIsInMaintenance") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("cluster") item_dict = self.rest_api("cluster")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
# The actual check # The actual check
return [ return [

View file

@ -1,22 +1,20 @@
import json
import logging import logging
import nagiosplugin import nagiosplugin
from typing import Iterable from typing import Iterable
from .types import ConnectionInfo, handle_unknown, PatroniResource from .types import APIError, ConnectionInfo, handle_unknown, PatroniResource
_log = logging.getLogger("nagiosplugin") _log = logging.getLogger("nagiosplugin")
class NodeIsPrimary(PatroniResource): class NodeIsPrimary(PatroniResource):
def probe(self: "NodeIsPrimary") -> Iterable[nagiosplugin.Metric]: def probe(self: "NodeIsPrimary") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("primary") try:
_log.debug(f"api call status: {r.status}") self.rest_api("primary")
_log.debug(f"api call data: {r.data}") except APIError:
return [nagiosplugin.Metric("is_primary", 0)]
return [nagiosplugin.Metric("is_primary", 1 if r.status == 200 else 0)] return [nagiosplugin.Metric("is_primary", 1)]
class NodeIsPrimarySummary(nagiosplugin.Summary): class NodeIsPrimarySummary(nagiosplugin.Summary):
@ -36,14 +34,14 @@ class NodeIsReplica(PatroniResource):
self.max_lag = max_lag self.max_lag = max_lag
def probe(self: "NodeIsReplica") -> Iterable[nagiosplugin.Metric]: def probe(self: "NodeIsReplica") -> Iterable[nagiosplugin.Metric]:
if self.max_lag is None: try:
r = self.rest_api("replica") if self.max_lag is None:
else: self.rest_api("replica")
r = self.rest_api(f"replica?lag={self.max_lag}") else:
_log.debug(f"api call status: {r.status}") self.rest_api(f"replica?lag={self.max_lag}")
_log.debug(f"api call data: {r.data}") except APIError:
return [nagiosplugin.Metric("is_replica", 0)]
return [nagiosplugin.Metric("is_replica", 1 if r.status == 200 else 0)] return [nagiosplugin.Metric("is_replica", 1)]
class NodeIsReplicaSummary(nagiosplugin.Summary): class NodeIsReplicaSummary(nagiosplugin.Summary):
@ -64,11 +62,8 @@ class NodeIsReplicaSummary(nagiosplugin.Summary):
class NodeIsPendingRestart(PatroniResource): class NodeIsPendingRestart(PatroniResource):
def probe(self: "NodeIsPendingRestart") -> Iterable[nagiosplugin.Metric]: def probe(self: "NodeIsPendingRestart") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("patroni") item_dict = self.rest_api("patroni")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
is_pending_restart = item_dict.get("pending_restart", False) is_pending_restart = item_dict.get("pending_restart", False)
return [ return [
nagiosplugin.Metric( nagiosplugin.Metric(
@ -103,11 +98,7 @@ class NodeTLHasChanged(PatroniResource):
self.save = save self.save = save
def probe(self: "NodeTLHasChanged") -> Iterable[nagiosplugin.Metric]: def probe(self: "NodeTLHasChanged") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("patroni") item_dict = self.rest_api("patroni")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
new_tl = item_dict["timeline"] new_tl = item_dict["timeline"]
_log.debug(f"save result: {self.save}") _log.debug(f"save result: {self.save}")
@ -154,12 +145,8 @@ class NodePatroniVersion(PatroniResource):
self.patroni_version = patroni_version self.patroni_version = patroni_version
def probe(self: "NodePatroniVersion") -> Iterable[nagiosplugin.Metric]: def probe(self: "NodePatroniVersion") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("patroni") item_dict = self.rest_api("patroni")
_log.debug(f"api call status: {r.status}")
_log.debug(f"api call data: {r.data}")
item_dict = json.loads(r.data)
version = item_dict["patroni"]["version"] version = item_dict["patroni"]["version"]
_log.debug( _log.debug(
f"Version data: patroni version {version} input version {self.patroni_version}" f"Version data: patroni version {version} input version {self.patroni_version}"
@ -190,11 +177,11 @@ class NodePatroniVersionSummary(nagiosplugin.Summary):
class NodeIsAlive(PatroniResource): class NodeIsAlive(PatroniResource):
def probe(self: "NodeIsAlive") -> Iterable[nagiosplugin.Metric]: def probe(self: "NodeIsAlive") -> Iterable[nagiosplugin.Metric]:
r = self.rest_api("liveness") try:
_log.debug(f"api call status: {r.status}") self.rest_api("liveness")
_log.debug(f"api call data: {r.data}") except APIError:
return [nagiosplugin.Metric("is_alive", 0)]
return [nagiosplugin.Metric("is_alive", 1 if r.status == 200 else 0)] return [nagiosplugin.Metric("is_alive", 1)]
class NodeIsAliveSummary(nagiosplugin.Summary): class NodeIsAliveSummary(nagiosplugin.Summary):

View file

@ -1,19 +1,26 @@
import logging import logging
import urllib3
import attr import attr
import nagiosplugin import nagiosplugin
from typing import Any, Callable, List import requests
import urllib3
from typing import Any, Callable, List, Optional, Tuple, Union
_log = logging.getLogger("nagiosplugin") _log = logging.getLogger("nagiosplugin")
class APIError(requests.exceptions.RequestException):
"""This exception is raised when the rest api couldn't
be reached and we got a http status code different from 200.
"""
@attr.s(auto_attribs=True, frozen=True, slots=True) @attr.s(auto_attribs=True, frozen=True, slots=True)
class ConnectionInfo: class ConnectionInfo:
endpoints: List[str] = ["http://127.0.0.1:8008"] endpoints: List[str] = ["http://127.0.0.1:8008"]
cert_file: str = "./ssl/benoit-dalibo-cert.pem" cert_file: Optional[str] = None
key_file: str = "./ssl/benoit-dalibo-key.pem" key_file: Optional[str] = None
ca_cert: str = "./ssl/CA-cert.pem" ca_cert: Optional[str] = None
@attr.s(auto_attribs=True, frozen=True, slots=True) @attr.s(auto_attribs=True, frozen=True, slots=True)
@ -27,27 +34,52 @@ class Parameters:
class PatroniResource(nagiosplugin.Resource): class PatroniResource(nagiosplugin.Resource):
conn_info: ConnectionInfo conn_info: ConnectionInfo
def rest_api( def rest_api(self: "PatroniResource", service: str) -> Any:
self: "PatroniResource", service: str
) -> urllib3.response.HTTPResponse:
"""Try to connect to all the provided endpoints for the requested service""" """Try to connect to all the provided endpoints for the requested service"""
for endpoint in self.conn_info.endpoints: for endpoint in self.conn_info.endpoints:
try: try:
cert: Optional[Union[Tuple[str, str], str]] = None
verify: Optional[Union[str, bool]] = None
if endpoint[:5] == "https": if endpoint[:5] == "https":
pool = urllib3.PoolManager( if (
cert_reqs="CERT_REQUIRED", self.conn_info.cert_file is not None
cert_file=self.conn_info.cert_file, and self.conn_info.key_file is not None # noqa W503
key_file=self.conn_info.key_file, ):
ca_certs=self.conn_info.ca_cert, # we provide a certificate and a private key
) cert = (self.conn_info.cert_file, self.conn_info.key_file)
else: elif (
pool = urllib3.PoolManager() self.conn_info.cert_file is not None
and self.conn_info.key_file is None # noqa W503
):
# we provide a pem file with the private key and the certificate
cert = self.conn_info.cert_file
_log.debug(f"Trying to connect to {endpoint}/{service}") if self.conn_info.ca_cert is not None:
return pool.request( # if cert is not None: this is the CA certificate
"GET", # otherwise this is a ca bundle with root certificate
f"{endpoint}/{service}", # then some optional intermediate certificate and finally
# the cerver certificate to validate the certification chain
verify = self.conn_info.ca_cert
else:
if cert is None:
# if cert is None we want to bypass https verification,
# this is in secure and should be avoided for production use
verify = False
_log.debug(
f"Trying to connect to {endpoint}/{service} with cert: {cert} verify: {verify}"
) )
r = requests.get(f"{endpoint}/{service}", verify=verify, cert=cert)
_log.debug(f"api call status: {r.status_code}")
_log.debug(f"api call data: {r.text}")
if r.status_code != 200:
raise APIError(
f"Failed to connect to {endpoint}/{service} status code {r.status_code}"
)
return r.json()
except nagiosplugin.Timeout as e: except nagiosplugin.Timeout as e:
raise e raise e
except Exception as e: except Exception as e:

View file

@ -85,6 +85,33 @@ For example, the following command will raise:
``` ```
check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1: check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1:
``` ```
## SSL
Several option are available:
* you have a self-signed certificate:
* `--ca_cert`: your certification chain `cat CA-certificate server-certificate > cabundle`
* you have a valid root certificate:
* `--cert_file`: your certificate or the concatenation of your certificate and private key
* `--key_file`: your private key (optional)
* `--ca_cert`: if your CA certificate is not installed on the server you can provide it here (optional)
* unsafe access: dont provide any info, you will get a warning as described below.
If you configuration is unsafe you might get warning message such as:
```
$ check_patroni -e https://p1:8008 cluster_node_count
/home/vagrant/.local/lib/python3.9/site-packages/urllib3/connectionpool.py:1045: InsecureRequestWarning: Unverified HTTPS request is being made to host 'p1'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
warnings.warn(
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
```
After checking on the message, you can choose to ignore it by redirecting the
standart output to /dev/null:
```
$ check_patroni -e https://p1:8008 cluster_node_count 2>/dev/null
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
```
_EOF_ _EOF_
readme readme
readme "## Cluster services" readme "## Cluster services"

View file

@ -6,9 +6,6 @@ exclude = build/
[mypy-setup] [mypy-setup]
ignore_errors = True ignore_errors = True
[mypy-urllib3.*]
ignore_missing_imports = true
[mypy-nagiosplugin.*] [mypy-nagiosplugin.*]
ignore_missing_imports = true ignore_missing_imports = true

View file

@ -4,6 +4,7 @@ flake8
mypy==0.961 mypy==0.961
pytest pytest
pytest-mock pytest-mock
types-requests
setuptools setuptools
tox tox
twine twine

View file

@ -39,7 +39,7 @@ setup(
python_requires=">=3.6", python_requires=">=3.6",
install_requires=[ install_requires=[
"attrs >= 17, !=21.1", "attrs >= 17, !=21.1",
"urllib3 >= 1.26.6", "requests",
"nagiosplugin >= 1.3.2", "nagiosplugin >= 1.3.2",
"click >= 8.0.1", "click >= 8.0.1",
], ],

View file

@ -24,7 +24,8 @@ commands =
deps = deps =
mypy == 0.961 mypy == 0.961
commands = commands =
mypy {toxinidir}/check_patroni # we need to install types-requests
mypy --install-types --non-interactive {toxinidir}/check_patroni
[testenv:build] [testenv:build]
deps = deps =