* debian/patches/dcs-last-seen.patch: New patch, taken from upstream commit
2f31e88b.
This commit is contained in:
parent
73e419a03c
commit
4f4460c3af
4
debian/changelog
vendored
4
debian/changelog
vendored
|
@ -3,6 +3,10 @@ patroni (2.1.1-2) UNRELEASED; urgency=medium
|
|||
[ Christoph Berg ]
|
||||
* debian/tests/control: Give Test-Commands meaningful names.
|
||||
|
||||
[ Michael Banck ]
|
||||
* debian/patches/dcs-last-seen.patch: New patch, taken from upstream commit
|
||||
2f31e88b.
|
||||
|
||||
-- Debian PostgreSQL Maintainers <team+postgresql@tracker.debian.org> Fri, 20 Aug 2021 10:54:10 +0200
|
||||
|
||||
patroni (2.1.1-1) unstable; urgency=medium
|
||||
|
|
154
debian/patches/dcs-last-seen.patch
vendored
Normal file
154
debian/patches/dcs-last-seen.patch
vendored
Normal file
|
@ -0,0 +1,154 @@
|
|||
From 2f31e88bdc3f933f0c3fffdc6ea67a99a7c378cc Mon Sep 17 00:00:00 2001
|
||||
From: Michael Banck <michael.banck@credativ.de>
|
||||
Date: Wed, 22 Sep 2021 10:01:35 +0200
|
||||
Subject: [PATCH] Add dcs_last_seen field to API (#2051)
|
||||
|
||||
This field notes the last time (as unix epoch) a cluster member has successfully communicated with the DCS. This is useful to identify and/or analyze network partitions.
|
||||
|
||||
Also, expose dcs_last_seen in the MemberStatus class and its from_api_response() method.
|
||||
---
|
||||
patroni/api.py | 6 ++++++
|
||||
patroni/dcs/__init__.py | 7 +++++++
|
||||
patroni/ha.py | 12 ++++++++----
|
||||
tests/test_api.py | 3 ++-
|
||||
tests/test_ha.py | 7 ++++---
|
||||
5 files changed, 27 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/patroni/api.py b/patroni/api.py
|
||||
index eeffaf2cf..c6adbb734 100644
|
||||
--- a/patroni/api.py
|
||||
+++ b/patroni/api.py
|
||||
@@ -288,6 +288,11 @@ def do_GET_metrics(self):
|
||||
metrics.append("# TYPE patroni_postgres_timeline counter")
|
||||
metrics.append("patroni_postgres_timeline{0} {1}".format(scope_label, postgres.get('timeline', 0)))
|
||||
|
||||
+ metrics.append("# HELP patroni_dcs_last_seen Epoch timestamp when DCS was last contacted successfully"
|
||||
+ " by Patroni.")
|
||||
+ metrics.append("# TYPE patroni_dcs_last_seen gauge")
|
||||
+ metrics.append("patroni_dcs_last_seen{0} {1}".format(scope_label, postgres.get('dcs_last_seen', 0)))
|
||||
+
|
||||
self._write_response(200, '\n'.join(metrics)+'\n', content_type='text/plain')
|
||||
|
||||
def _read_json_content(self, body_is_optional=False):
|
||||
@@ -600,6 +605,7 @@ def get_postgresql_status(self, retry=False):
|
||||
'role': 'replica' if row[1] == 0 else 'master',
|
||||
'server_version': postgresql.server_version,
|
||||
'cluster_unlocked': bool(not cluster or cluster.is_unlocked()),
|
||||
+ 'dcs_last_seen': self.server.patroni.dcs.last_seen,
|
||||
'xlog': ({
|
||||
'received_location': row[4] or row[3],
|
||||
'replayed_location': row[3],
|
||||
diff --git a/patroni/dcs/__init__.py b/patroni/dcs/__init__.py
|
||||
index 38b1e27d9..d96087d16 100644
|
||||
--- a/patroni/dcs/__init__.py
|
||||
+++ b/patroni/dcs/__init__.py
|
||||
@@ -652,6 +652,7 @@ def __init__(self, config):
|
||||
self._cluster_valid_till = 0
|
||||
self._cluster_thread_lock = Lock()
|
||||
self._last_lsn = ''
|
||||
+ self._last_seen = 0
|
||||
self._last_status = {}
|
||||
self.event = Event()
|
||||
|
||||
@@ -722,6 +723,10 @@ def reload_config(self, config):
|
||||
def loop_wait(self):
|
||||
return self._loop_wait
|
||||
|
||||
+ @property
|
||||
+ def last_seen(self):
|
||||
+ return self._last_seen
|
||||
+
|
||||
@abc.abstractmethod
|
||||
def _load_cluster(self):
|
||||
"""Internally this method should build `Cluster` object which
|
||||
@@ -744,6 +749,8 @@ def get_cluster(self, force=False):
|
||||
self.reset_cluster()
|
||||
raise
|
||||
|
||||
+ self._last_seen = int(time.time())
|
||||
+
|
||||
with self._cluster_thread_lock:
|
||||
self._cluster = cluster
|
||||
self._cluster_valid_till = time.time() + self.ttl
|
||||
diff --git a/patroni/ha.py b/patroni/ha.py
|
||||
index 209a52182..83c289c51 100644
|
||||
--- a/patroni/ha.py
|
||||
+++ b/patroni/ha.py
|
||||
@@ -21,13 +21,15 @@
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
-class _MemberStatus(namedtuple('_MemberStatus', ['member', 'reachable', 'in_recovery', 'timeline',
|
||||
- 'wal_position', 'tags', 'watchdog_failed'])):
|
||||
+class _MemberStatus(namedtuple('_MemberStatus', ['member', 'reachable', 'in_recovery',
|
||||
+ 'dcs_last_seen', 'timeline', 'wal_position',
|
||||
+ 'tags', 'watchdog_failed'])):
|
||||
"""Node status distilled from API response:
|
||||
|
||||
member - dcs.Member object of the node
|
||||
reachable - `!False` if the node is not reachable or is not responding with correct JSON
|
||||
in_recovery - `!True` if pg_is_in_recovery() == true
|
||||
+ dcs_last_seen - timestamp from JSON of last succesful communication with DCS
|
||||
timeline - timeline value from JSON
|
||||
wal_position - maximum value of `replayed_location` or `received_location` from JSON
|
||||
tags - dictionary with values of different tags (i.e. nofailover)
|
||||
@@ -37,12 +39,14 @@ class _MemberStatus(namedtuple('_MemberStatus', ['member', 'reachable', 'in_reco
|
||||
def from_api_response(cls, member, json):
|
||||
is_master = json['role'] == 'master'
|
||||
timeline = json.get('timeline', 0)
|
||||
+ dcs_last_seen = json.get('dcs_last_seen', 0)
|
||||
wal = not is_master and max(json['xlog'].get('received_location', 0), json['xlog'].get('replayed_location', 0))
|
||||
- return cls(member, True, not is_master, timeline, wal, json.get('tags', {}), json.get('watchdog_failed', False))
|
||||
+ return cls(member, True, not is_master, dcs_last_seen, timeline, wal,
|
||||
+ json.get('tags', {}), json.get('watchdog_failed', False))
|
||||
|
||||
@classmethod
|
||||
def unknown(cls, member):
|
||||
- return cls(member, False, None, 0, 0, {}, False)
|
||||
+ return cls(member, False, None, 0, 0, 0, {}, False)
|
||||
|
||||
def failover_limitation(self):
|
||||
"""Returns reason why this node can't promote or None if everything is ok."""
|
||||
diff --git a/tests/test_api.py b/tests/test_api.py
|
||||
index 34a6224eb..60c3dd038 100644
|
||||
--- a/tests/test_api.py
|
||||
+++ b/tests/test_api.py
|
||||
@@ -54,6 +54,7 @@ class MockHa(object):
|
||||
|
||||
state_handler = MockPostgresql()
|
||||
watchdog = MockWatchdog()
|
||||
+ dcs_last_seen = 0
|
||||
|
||||
@staticmethod
|
||||
def is_leader():
|
||||
@@ -77,7 +78,7 @@ def delete_future_restart():
|
||||
|
||||
@staticmethod
|
||||
def fetch_nodes_statuses(members):
|
||||
- return [_MemberStatus(None, True, None, 0, None, {}, False)]
|
||||
+ return [_MemberStatus(None, True, None, 0, 0, None, {}, False)]
|
||||
|
||||
@staticmethod
|
||||
def schedule_future_restart(data):
|
||||
diff --git a/tests/test_ha.py b/tests/test_ha.py
|
||||
index 3228be697..0f30a1102 100644
|
||||
--- a/tests/test_ha.py
|
||||
+++ b/tests/test_ha.py
|
||||
@@ -80,13 +80,14 @@ def get_standby_cluster_initialized_with_only_leader(failover=None, sync=None):
|
||||
)
|
||||
|
||||
|
||||
-def get_node_status(reachable=True, in_recovery=True, timeline=2,
|
||||
- wal_position=10, nofailover=False, watchdog_failed=False):
|
||||
+def get_node_status(reachable=True, in_recovery=True, dcs_last_seen=0,
|
||||
+ timeline=2, wal_position=10, nofailover=False,
|
||||
+ watchdog_failed=False):
|
||||
def fetch_node_status(e):
|
||||
tags = {}
|
||||
if nofailover:
|
||||
tags['nofailover'] = True
|
||||
- return _MemberStatus(e, reachable, in_recovery, timeline, wal_position, tags, watchdog_failed)
|
||||
+ return _MemberStatus(e, reachable, in_recovery, dcs_last_seen, timeline, wal_position, tags, watchdog_failed)
|
||||
return fetch_node_status
|
||||
|
||||
|
1
debian/patches/series
vendored
1
debian/patches/series
vendored
|
@ -5,3 +5,4 @@ offline_intersphinx.patch
|
|||
regression_tests_disable_requirement_download.patch
|
||||
requirements_cdiff.patch
|
||||
regression_tests_disable_raft_tests.py
|
||||
dcs-last-seen.patch
|
||||
|
|
Loading…
Reference in a new issue