Move from urllib3 to requests
This commit is contained in:
parent
0800fc72e9
commit
9cd80f5af8
29
README.md
29
README.md
|
@ -101,6 +101,33 @@ For example, the following command will raise:
|
|||
```
|
||||
check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1:
|
||||
```
|
||||
## SSL
|
||||
|
||||
Several option are available:
|
||||
|
||||
* you have a self-signed certificate:
|
||||
* `--ca_cert`: your certification chain `cat CA-certificate server-certificate > cabundle`
|
||||
* you have a valid root certificate:
|
||||
* `--cert_file`: your certificate or the concatenation of your certificate and private key
|
||||
* `--key_file`: your private key (optional)
|
||||
* `--ca_cert`: if your CA certificate is not installed on the server you can provide it here (optional)
|
||||
* unsafe access: dont provide any info, you will get a warning as described below.
|
||||
|
||||
If you configuration is unsafe you might get warning message such as:
|
||||
|
||||
```
|
||||
$ check_patroni -e https://p1:8008 cluster_node_count
|
||||
/home/vagrant/.local/lib/python3.9/site-packages/urllib3/connectionpool.py:1045: InsecureRequestWarning: Unverified HTTPS request is being made to host 'p1'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
|
||||
warnings.warn(
|
||||
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
|
||||
```
|
||||
|
||||
After checking on the message, you can choose to ignore it by redirecting the
|
||||
standart output to /dev/null:
|
||||
```
|
||||
$ check_patroni -e https://p1:8008 cluster_node_count 2>/dev/null
|
||||
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
|
||||
```
|
||||
|
||||
## Cluster services
|
||||
|
||||
|
@ -269,7 +296,7 @@ Usage: check_patroni node_is_pending_restart [OPTIONS]
|
|||
a restart of PostgreSQL to take effect.
|
||||
|
||||
Check:
|
||||
* `OK`: if the node has pending restart tag.
|
||||
* `OK`: if the node has no pending restart tag.
|
||||
* `CRITICAL`: otherwise
|
||||
|
||||
Perfdata: `is_pending_restart` is 1 if the node has pending restart tag, 0
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import logging
|
||||
import re
|
||||
from configparser import ConfigParser
|
||||
|
||||
|
@ -32,6 +33,9 @@ from .node import (
|
|||
from .types import ConnectionInfo, Parameters
|
||||
from .convert import size_to_byte
|
||||
|
||||
_log = logging.getLogger("nagiosplugin")
|
||||
DEFAULT_CFG = "config.ini"
|
||||
|
||||
|
||||
def print_version(ctx: click.Context, param: str, value: str) -> None:
|
||||
if not value or ctx.resilient_parsing:
|
||||
|
@ -40,9 +44,6 @@ def print_version(ctx: click.Context, param: str, value: str) -> None:
|
|||
ctx.exit()
|
||||
|
||||
|
||||
DEFAULT_CFG = "config.ini"
|
||||
|
||||
|
||||
def configure(ctx: click.Context, param: str, filename: str) -> None:
|
||||
"""Use a config file for the parameters
|
||||
stolen from https://jwodder.github.io/kbits/posts/click-config/
|
||||
|
|
|
@ -17,11 +17,7 @@ def replace_chars(text: str) -> str:
|
|||
|
||||
class ClusterNodeCount(PatroniResource):
|
||||
def probe(self: "ClusterNodeCount") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("cluster")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
item_dict = self.rest_api("cluster")
|
||||
role_counters: Counter[str] = Counter()
|
||||
roles = []
|
||||
status_counters: Counter[str] = Counter()
|
||||
|
@ -53,11 +49,8 @@ class ClusterNodeCount(PatroniResource):
|
|||
|
||||
class ClusterHasLeader(PatroniResource):
|
||||
def probe(self: "ClusterHasLeader") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("cluster")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
item_dict = self.rest_api("cluster")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
is_leader_found = False
|
||||
for member in item_dict["members"]:
|
||||
if member["role"] == "leader" and member["state"] == "running":
|
||||
|
@ -91,11 +84,8 @@ class ClusterHasReplica(PatroniResource):
|
|||
self.max_lag = max_lag
|
||||
|
||||
def probe(self: "ClusterHasReplica") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("cluster")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
item_dict = self.rest_api("cluster")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
replicas = []
|
||||
healthy_replica = 0
|
||||
unhealthy_replica = 0
|
||||
|
@ -140,11 +130,9 @@ class ClusterConfigHasChanged(PatroniResource):
|
|||
self.save = save
|
||||
|
||||
def probe(self: "ClusterConfigHasChanged") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("config")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
item_dict = self.rest_api("config")
|
||||
|
||||
new_hash = hashlib.md5(r.data).hexdigest()
|
||||
new_hash = hashlib.md5(json.dumps(item_dict).encode()).hexdigest()
|
||||
|
||||
_log.debug(f"save result: {self.save}")
|
||||
old_hash = self.config_hash
|
||||
|
@ -184,11 +172,7 @@ class ClusterConfigHasChangedSummary(nagiosplugin.Summary):
|
|||
|
||||
class ClusterIsInMaintenance(PatroniResource):
|
||||
def probe(self: "ClusterIsInMaintenance") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("cluster")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
item_dict = self.rest_api("cluster")
|
||||
|
||||
# The actual check
|
||||
return [
|
||||
|
|
|
@ -1,22 +1,20 @@
|
|||
import json
|
||||
import logging
|
||||
|
||||
import nagiosplugin
|
||||
from typing import Iterable
|
||||
|
||||
from .types import ConnectionInfo, handle_unknown, PatroniResource
|
||||
|
||||
from .types import APIError, ConnectionInfo, handle_unknown, PatroniResource
|
||||
|
||||
_log = logging.getLogger("nagiosplugin")
|
||||
|
||||
|
||||
class NodeIsPrimary(PatroniResource):
|
||||
def probe(self: "NodeIsPrimary") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("primary")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
return [nagiosplugin.Metric("is_primary", 1 if r.status == 200 else 0)]
|
||||
try:
|
||||
self.rest_api("primary")
|
||||
except APIError:
|
||||
return [nagiosplugin.Metric("is_primary", 0)]
|
||||
return [nagiosplugin.Metric("is_primary", 1)]
|
||||
|
||||
|
||||
class NodeIsPrimarySummary(nagiosplugin.Summary):
|
||||
|
@ -36,14 +34,14 @@ class NodeIsReplica(PatroniResource):
|
|||
self.max_lag = max_lag
|
||||
|
||||
def probe(self: "NodeIsReplica") -> Iterable[nagiosplugin.Metric]:
|
||||
if self.max_lag is None:
|
||||
r = self.rest_api("replica")
|
||||
else:
|
||||
r = self.rest_api(f"replica?lag={self.max_lag}")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
return [nagiosplugin.Metric("is_replica", 1 if r.status == 200 else 0)]
|
||||
try:
|
||||
if self.max_lag is None:
|
||||
self.rest_api("replica")
|
||||
else:
|
||||
self.rest_api(f"replica?lag={self.max_lag}")
|
||||
except APIError:
|
||||
return [nagiosplugin.Metric("is_replica", 0)]
|
||||
return [nagiosplugin.Metric("is_replica", 1)]
|
||||
|
||||
|
||||
class NodeIsReplicaSummary(nagiosplugin.Summary):
|
||||
|
@ -64,11 +62,8 @@ class NodeIsReplicaSummary(nagiosplugin.Summary):
|
|||
|
||||
class NodeIsPendingRestart(PatroniResource):
|
||||
def probe(self: "NodeIsPendingRestart") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("patroni")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
item_dict = self.rest_api("patroni")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
is_pending_restart = item_dict.get("pending_restart", False)
|
||||
return [
|
||||
nagiosplugin.Metric(
|
||||
|
@ -103,11 +98,7 @@ class NodeTLHasChanged(PatroniResource):
|
|||
self.save = save
|
||||
|
||||
def probe(self: "NodeTLHasChanged") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("patroni")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
item_dict = self.rest_api("patroni")
|
||||
new_tl = item_dict["timeline"]
|
||||
|
||||
_log.debug(f"save result: {self.save}")
|
||||
|
@ -154,12 +145,8 @@ class NodePatroniVersion(PatroniResource):
|
|||
self.patroni_version = patroni_version
|
||||
|
||||
def probe(self: "NodePatroniVersion") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("patroni")
|
||||
item_dict = self.rest_api("patroni")
|
||||
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
item_dict = json.loads(r.data)
|
||||
version = item_dict["patroni"]["version"]
|
||||
_log.debug(
|
||||
f"Version data: patroni version {version} input version {self.patroni_version}"
|
||||
|
@ -190,11 +177,11 @@ class NodePatroniVersionSummary(nagiosplugin.Summary):
|
|||
|
||||
class NodeIsAlive(PatroniResource):
|
||||
def probe(self: "NodeIsAlive") -> Iterable[nagiosplugin.Metric]:
|
||||
r = self.rest_api("liveness")
|
||||
_log.debug(f"api call status: {r.status}")
|
||||
_log.debug(f"api call data: {r.data}")
|
||||
|
||||
return [nagiosplugin.Metric("is_alive", 1 if r.status == 200 else 0)]
|
||||
try:
|
||||
self.rest_api("liveness")
|
||||
except APIError:
|
||||
return [nagiosplugin.Metric("is_alive", 0)]
|
||||
return [nagiosplugin.Metric("is_alive", 1)]
|
||||
|
||||
|
||||
class NodeIsAliveSummary(nagiosplugin.Summary):
|
||||
|
|
|
@ -1,19 +1,26 @@
|
|||
import logging
|
||||
import urllib3
|
||||
|
||||
import attr
|
||||
import nagiosplugin
|
||||
from typing import Any, Callable, List
|
||||
import requests
|
||||
import urllib3
|
||||
from typing import Any, Callable, List, Optional, Tuple, Union
|
||||
|
||||
_log = logging.getLogger("nagiosplugin")
|
||||
|
||||
|
||||
class APIError(requests.exceptions.RequestException):
|
||||
"""This exception is raised when the rest api couldn't
|
||||
be reached and we got a http status code different from 200.
|
||||
"""
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True, slots=True)
|
||||
class ConnectionInfo:
|
||||
endpoints: List[str] = ["http://127.0.0.1:8008"]
|
||||
cert_file: str = "./ssl/benoit-dalibo-cert.pem"
|
||||
key_file: str = "./ssl/benoit-dalibo-key.pem"
|
||||
ca_cert: str = "./ssl/CA-cert.pem"
|
||||
cert_file: Optional[str] = None
|
||||
key_file: Optional[str] = None
|
||||
ca_cert: Optional[str] = None
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, frozen=True, slots=True)
|
||||
|
@ -27,27 +34,52 @@ class Parameters:
|
|||
class PatroniResource(nagiosplugin.Resource):
|
||||
conn_info: ConnectionInfo
|
||||
|
||||
def rest_api(
|
||||
self: "PatroniResource", service: str
|
||||
) -> urllib3.response.HTTPResponse:
|
||||
def rest_api(self: "PatroniResource", service: str) -> Any:
|
||||
"""Try to connect to all the provided endpoints for the requested service"""
|
||||
for endpoint in self.conn_info.endpoints:
|
||||
try:
|
||||
cert: Optional[Union[Tuple[str, str], str]] = None
|
||||
verify: Optional[Union[str, bool]] = None
|
||||
if endpoint[:5] == "https":
|
||||
pool = urllib3.PoolManager(
|
||||
cert_reqs="CERT_REQUIRED",
|
||||
cert_file=self.conn_info.cert_file,
|
||||
key_file=self.conn_info.key_file,
|
||||
ca_certs=self.conn_info.ca_cert,
|
||||
)
|
||||
else:
|
||||
pool = urllib3.PoolManager()
|
||||
if (
|
||||
self.conn_info.cert_file is not None
|
||||
and self.conn_info.key_file is not None # noqa W503
|
||||
):
|
||||
# we provide a certificate and a private key
|
||||
cert = (self.conn_info.cert_file, self.conn_info.key_file)
|
||||
elif (
|
||||
self.conn_info.cert_file is not None
|
||||
and self.conn_info.key_file is None # noqa W503
|
||||
):
|
||||
# we provide a pem file with the private key and the certificate
|
||||
cert = self.conn_info.cert_file
|
||||
|
||||
_log.debug(f"Trying to connect to {endpoint}/{service}")
|
||||
return pool.request(
|
||||
"GET",
|
||||
f"{endpoint}/{service}",
|
||||
if self.conn_info.ca_cert is not None:
|
||||
# if cert is not None: this is the CA certificate
|
||||
# otherwise this is a ca bundle with root certificate
|
||||
# then some optional intermediate certificate and finally
|
||||
# the cerver certificate to validate the certification chain
|
||||
verify = self.conn_info.ca_cert
|
||||
else:
|
||||
if cert is None:
|
||||
# if cert is None we want to bypass https verification,
|
||||
# this is in secure and should be avoided for production use
|
||||
verify = False
|
||||
|
||||
_log.debug(
|
||||
f"Trying to connect to {endpoint}/{service} with cert: {cert} verify: {verify}"
|
||||
)
|
||||
|
||||
r = requests.get(f"{endpoint}/{service}", verify=verify, cert=cert)
|
||||
_log.debug(f"api call status: {r.status_code}")
|
||||
_log.debug(f"api call data: {r.text}")
|
||||
|
||||
if r.status_code != 200:
|
||||
raise APIError(
|
||||
f"Failed to connect to {endpoint}/{service} status code {r.status_code}"
|
||||
)
|
||||
|
||||
return r.json()
|
||||
except nagiosplugin.Timeout as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
|
|
@ -85,6 +85,33 @@ For example, the following command will raise:
|
|||
```
|
||||
check_patroni -e https://10.20.199.3:8008 cluster_has_replica --warning 2: --critical 1:
|
||||
```
|
||||
## SSL
|
||||
|
||||
Several option are available:
|
||||
|
||||
* you have a self-signed certificate:
|
||||
* `--ca_cert`: your certification chain `cat CA-certificate server-certificate > cabundle`
|
||||
* you have a valid root certificate:
|
||||
* `--cert_file`: your certificate or the concatenation of your certificate and private key
|
||||
* `--key_file`: your private key (optional)
|
||||
* `--ca_cert`: if your CA certificate is not installed on the server you can provide it here (optional)
|
||||
* unsafe access: dont provide any info, you will get a warning as described below.
|
||||
|
||||
If you configuration is unsafe you might get warning message such as:
|
||||
|
||||
```
|
||||
$ check_patroni -e https://p1:8008 cluster_node_count
|
||||
/home/vagrant/.local/lib/python3.9/site-packages/urllib3/connectionpool.py:1045: InsecureRequestWarning: Unverified HTTPS request is being made to host 'p1'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
|
||||
warnings.warn(
|
||||
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
|
||||
```
|
||||
|
||||
After checking on the message, you can choose to ignore it by redirecting the
|
||||
standart output to /dev/null:
|
||||
```
|
||||
$ check_patroni -e https://p1:8008 cluster_node_count 2>/dev/null
|
||||
CLUSTERNODECOUNT OK - members is 2 | members=2 role_leader=1 role_replica=1 state_running=2
|
||||
```
|
||||
_EOF_
|
||||
readme
|
||||
readme "## Cluster services"
|
||||
|
|
3
mypy.ini
3
mypy.ini
|
@ -6,9 +6,6 @@ exclude = build/
|
|||
[mypy-setup]
|
||||
ignore_errors = True
|
||||
|
||||
[mypy-urllib3.*]
|
||||
ignore_missing_imports = true
|
||||
|
||||
[mypy-nagiosplugin.*]
|
||||
ignore_missing_imports = true
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ flake8
|
|||
mypy==0.961
|
||||
pytest
|
||||
pytest-mock
|
||||
types-requests
|
||||
setuptools
|
||||
tox
|
||||
twine
|
||||
|
|
2
setup.py
2
setup.py
|
@ -39,7 +39,7 @@ setup(
|
|||
python_requires=">=3.6",
|
||||
install_requires=[
|
||||
"attrs >= 17, !=21.1",
|
||||
"urllib3 >= 1.26.6",
|
||||
"requests",
|
||||
"nagiosplugin >= 1.3.2",
|
||||
"click >= 8.0.1",
|
||||
],
|
||||
|
|
Loading…
Reference in a new issue