From 5160fe051c0f2a507d0339b79b934665fb54030e Mon Sep 17 00:00:00 2001 From: Syed Paymaan Raza <1238752+spraza@users.noreply.github.com> Date: Fri, 13 Dec 2024 15:28:16 -0800 Subject: [PATCH 1/2] Extend gray failure recentHealthTriggeredRecoveryTime state to reflect any recovery, including non-gray failure triggered ones --- fdbclient/include/fdbclient/ServerKnobs.h | 5 +++-- fdbserver/ClusterController.actor.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index ee732445471..de719bd3601 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -776,8 +776,9 @@ class SWIFT_CXX_IMMORTAL_SINGLETON_TYPE ServerKnobs : public KnobsImpl clusterWatchDatabase(ClusterControllerData* cluster, collection = actorCollection(db->recoveryData->addActor.getFuture()); recoveryCore = clusterRecoveryCore(db->recoveryData); + cluster->recentHealthTriggeredRecoveryTime.push(now()); // Master failure detection is pretty sensitive, but if we are in the middle of a very long recovery we // really don't want to have to start over @@ -3061,7 +3062,6 @@ ACTOR Future workerHealthMonitor(ClusterControllerData* self) { if (self->shouldTriggerRecoveryDueToDegradedServers()) { if (SERVER_KNOBS->CC_HEALTH_TRIGGER_RECOVERY) { if (self->recentRecoveryCountDueToHealth() < SERVER_KNOBS->CC_MAX_HEALTH_RECOVERY_COUNT) { - self->recentHealthTriggeredRecoveryTime.push(now()); self->excludedDegradedServers.clear(); for (const auto& degradedServer : self->degradationInfo.degradedServers) { self->excludedDegradedServers[degradedServer] = now(); From 49bcf0d5aeaf2b4a3e73515dcc811b1f80d0d86f Mon Sep 17 00:00:00 2001 From: Syed Paymaan Raza <1238752+spraza@users.noreply.github.com> Date: Fri, 10 Jan 2025 16:16:40 -0800 Subject: [PATCH 2/2] Update knob documentation --- fdbclient/include/fdbclient/ServerKnobs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index de719bd3601..5fb2914f612 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -776,9 +776,9 @@ class SWIFT_CXX_IMMORTAL_SINGLETON_TYPE ServerKnobs : public KnobsImpl