From c11418c0830dce15f6bd454fbee68106d3415ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Fern=C3=A1ndez?= Date: Thu, 7 Nov 2024 22:00:43 +0100 Subject: [PATCH] Revert "Do not unban replicas if a primary is available" (#850) Revert "Do not unban replicas if a primary is available (#843)" This reverts commit cdcfa99fb9bf759b83d88984ba58c51855b60fcb. --- CONFIG.md | 10 ---------- README.md | 2 +- src/config.rs | 4 ---- src/pool.rs | 10 +--------- src/query_router.rs | 2 -- 5 files changed, 2 insertions(+), 26 deletions(-) diff --git a/CONFIG.md b/CONFIG.md index 3dde1e18..4e984ede 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -130,16 +130,6 @@ default: 60 # seconds How long to ban a server if it fails a health check (seconds). -### unban_replicas_when_all_banned -``` -path: general.unban_replicas_when_all_banned -default: true -``` - -Whether or not we should unban all replicas when they are all banned. This is set -to true by default to prevent disconnection when we have replicas with a false positive -health check. - ### log_client_connections ``` path: general.log_client_connections diff --git a/README.md b/README.md index 41c9f945..21e6da70 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,7 @@ The setting will persist until it's changed again or the client disconnects. By default, all queries are routed to the first available server; `default_role` setting controls this behavior. ### Failover -All servers are checked with a `;` (very fast) query before being given to a client. Additionally, the server health is monitored with every client query that it processes. If the server is not reachable, it will be banned and cannot serve any more transactions for the duration of the ban. The queries are routed to the remaining servers. If all servers become banned, the behavior is controlled by the configuration parameter `unban_replicas_when_all_banned`. If it is set to true (the default), the ban list is cleared: this is a safety precaution against false positives, if it is set to false, no replicas will be available until they become healthy. The primary can never be banned. +All servers are checked with a `;` (very fast) query before being given to a client. Additionally, the server health is monitored with every client query that it processes. If the server is not reachable, it will be banned and cannot serve any more transactions for the duration of the ban. The queries are routed to the remaining servers. If all servers become banned, the ban list is cleared: this is a safety precaution against false positives. The primary can never be banned. The ban time can be changed with `ban_time`. The default is 60 seconds. diff --git a/src/config.rs b/src/config.rs index 9b8cc072..b0d98fb5 100644 --- a/src/config.rs +++ b/src/config.rs @@ -315,9 +315,6 @@ pub struct General { #[serde(default = "General::default_ban_time")] pub ban_time: i64, - #[serde(default)] // True - pub unban_replicas_when_all_banned: bool, - #[serde(default = "General::default_idle_client_in_transaction_timeout")] pub idle_client_in_transaction_timeout: u64, @@ -463,7 +460,6 @@ impl Default for General { healthcheck_timeout: Self::default_healthcheck_timeout(), healthcheck_delay: Self::default_healthcheck_delay(), ban_time: Self::default_ban_time(), - unban_replicas_when_all_banned: true, idle_client_in_transaction_timeout: Self::default_idle_client_in_transaction_timeout(), server_lifetime: Self::default_server_lifetime(), server_round_robin: Self::default_server_round_robin(), diff --git a/src/pool.rs b/src/pool.rs index f9b481a2..7915a0a4 100644 --- a/src/pool.rs +++ b/src/pool.rs @@ -189,9 +189,6 @@ pub struct PoolSettings { // Ban time pub ban_time: i64, - // Should we automatically unban replicas when all are banned? - pub unban_replicas_when_all_banned: bool, - // Regex for searching for the sharding key in SQL statements pub sharding_key_regex: Option, @@ -231,7 +228,6 @@ impl Default for PoolSettings { healthcheck_delay: General::default_healthcheck_delay(), healthcheck_timeout: General::default_healthcheck_timeout(), ban_time: General::default_ban_time(), - unban_replicas_when_all_banned: true, sharding_key_regex: None, shard_id_regex: None, regex_search_limit: 1000, @@ -545,9 +541,6 @@ impl ConnectionPool { healthcheck_delay: config.general.healthcheck_delay, healthcheck_timeout: config.general.healthcheck_timeout, ban_time: config.general.ban_time, - unban_replicas_when_all_banned: config - .general - .unban_replicas_when_all_banned, sharding_key_regex: pool_config .sharding_key_regex .clone() @@ -953,9 +946,8 @@ impl ConnectionPool { let read_guard = self.banlist.read(); let all_replicas_banned = read_guard[address.shard].len() == replicas_available; drop(read_guard); - let unban_replicas_when_all_banned = self.settings.clone().unban_replicas_when_all_banned; - if all_replicas_banned && unban_replicas_when_all_banned { + if all_replicas_banned { let mut write_guard = self.banlist.write(); warn!("Unbanning all replicas."); write_guard[address.shard].clear(); diff --git a/src/query_router.rs b/src/query_router.rs index 59cca232..2ed6b755 100644 --- a/src/query_router.rs +++ b/src/query_router.rs @@ -1469,7 +1469,6 @@ mod test { healthcheck_delay: PoolSettings::default().healthcheck_delay, healthcheck_timeout: PoolSettings::default().healthcheck_timeout, ban_time: PoolSettings::default().ban_time, - unban_replicas_when_all_banned: true, sharding_key_regex: None, shard_id_regex: None, default_shard: crate::config::DefaultShard::Shard(0), @@ -1548,7 +1547,6 @@ mod test { healthcheck_delay: PoolSettings::default().healthcheck_delay, healthcheck_timeout: PoolSettings::default().healthcheck_timeout, ban_time: PoolSettings::default().ban_time, - unban_replicas_when_all_banned: true, sharding_key_regex: Some(Regex::new(r"/\* sharding_key: (\d+) \*/").unwrap()), shard_id_regex: Some(Regex::new(r"/\* shard_id: (\d+) \*/").unwrap()), default_shard: crate::config::DefaultShard::Shard(0),