From d258c8096108d085ccf36a83cd4c1b34a2baf57b Mon Sep 17 00:00:00 2001 From: danielxiangzl Date: Thu, 12 Dec 2024 18:24:20 -0800 Subject: [PATCH] test leader equivocation --- consensus/consensus-types/src/block.rs | 10 ++++- consensus/consensus-types/src/block_data.rs | 4 ++ consensus/consensus-types/src/proposal_msg.rs | 9 ++++ consensus/src/round_manager.rs | 41 ++++++++++++++++++- .../consensus/consensus_fault_tolerance.rs | 41 +++++++++++++++++++ testsuite/testcases/src/performance_test.rs | 32 ++++++++++++++- 6 files changed, 133 insertions(+), 4 deletions(-) diff --git a/consensus/consensus-types/src/block.rs b/consensus/consensus-types/src/block.rs index c36946e493e7f4..d36868dbcbb336 100644 --- a/consensus/consensus-types/src/block.rs +++ b/consensus/consensus-types/src/block.rs @@ -9,7 +9,7 @@ use crate::{ }; use anyhow::{bail, ensure, format_err}; use aptos_bitvec::BitVec; -use aptos_crypto::{bls12381, hash::CryptoHash, HashValue}; +use aptos_crypto::{bls12381::{self, Signature}, hash::CryptoHash, HashValue}; use aptos_infallible::duration_since_epoch; use aptos_types::{ account_address::AccountAddress, @@ -87,6 +87,14 @@ impl Block { self.is_opt } + pub fn set_timestamp(&mut self, timestamp: u64) { + self.block_data.set_timestamp(timestamp); + } + + pub fn set_signature(&mut self, signature: Signature) { + self.signature = Some(signature); + } + pub fn set_quorum_cert(&mut self, qc: QuorumCert) { self.block_data.set_quorum_cert(qc); } diff --git a/consensus/consensus-types/src/block_data.rs b/consensus/consensus-types/src/block_data.rs index 82b181017f3680..dbf2afc4dd0fac 100644 --- a/consensus/consensus-types/src/block_data.rs +++ b/consensus/consensus-types/src/block_data.rs @@ -112,6 +112,10 @@ impl CryptoHash for BlockData { } impl BlockData { + pub fn set_timestamp(&mut self, timestamp: u64) { + self.timestamp_usecs = timestamp; + } + pub fn set_quorum_cert(&mut self, qc: QuorumCert) { self.quorum_cert = qc; } diff --git a/consensus/consensus-types/src/proposal_msg.rs b/consensus/consensus-types/src/proposal_msg.rs index 7a6c089cf27e43..4d52f54aacfbcd 100644 --- a/consensus/consensus-types/src/proposal_msg.rs +++ b/consensus/consensus-types/src/proposal_msg.rs @@ -4,6 +4,7 @@ use crate::{block::Block, common::Author, proof_of_store::ProofCache, sync_info::SyncInfo}; use anyhow::{anyhow, ensure, format_err, Context, Ok, Result}; +use aptos_crypto::bls12381::Signature; use aptos_short_hex_str::AsShortHexStr; use aptos_types::validator_verifier::ValidatorVerifier; use serde::{Deserialize, Serialize}; @@ -26,6 +27,14 @@ impl ProposalMsg { } } + pub fn set_timestamp(&mut self, timestamp: u64) { + self.proposal.set_timestamp(timestamp); + } + + pub fn set_signature(&mut self, signature: Signature) { + self.proposal.set_signature(signature); + } + pub fn epoch(&self) -> u64 { self.proposal.epoch() } diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs index 7a0885fc79eb77..08cea7ea1dd1b3 100644 --- a/consensus/src/round_manager.rs +++ b/consensus/src/round_manager.rs @@ -459,7 +459,7 @@ impl RoundManager { sync_info, network.clone(), proposal_generator, - safety_rules, + safety_rules.clone(), proposer_election, parent_id, ) @@ -474,9 +474,19 @@ impl RoundManager { { if Self::check_whether_to_inject_reconfiguration_error() { Self::attempt_to_inject_reconfiguration_error( + epoch_state.clone(), + network.clone(), + &proposal_msg, + ) + .await?; + } + + if Self::check_whether_to_equivocate() { + Self::attempt_to_equivocate( epoch_state, network.clone(), &proposal_msg, + safety_rules.clone() ) .await?; } @@ -1922,6 +1932,12 @@ impl RoundManager { false } + #[cfg(feature = "failpoints")] + fn check_whether_to_equivocate() -> bool { + fail_point!("consensus::leader_equivocation", |_| true); + false + } + /// Given R1 <- B2 if R1 has the reconfiguration txn, we inject error on B2 if R1.round + 1 = B2.round /// Direct suffix is checked by parent.has_reconfiguration && !parent.parent.has_reconfiguration /// The error is injected by sending proposals to half of the validators to force a timeout. @@ -1956,4 +1972,27 @@ impl RoundManager { Ok(()) } } + + #[cfg(feature = "failpoints")] + async fn attempt_to_equivocate( + epoch_state: Arc, + network: Arc, + proposal_msg: &ProposalMsg, + safety_rules: Arc>, + ) -> anyhow::Result<()> { + let all_peers: Vec<_> = epoch_state + .verifier + .get_ordered_account_addresses_iter() + .collect(); + let mut timestamp = proposal_msg.proposal().block_data().timestamp_usecs(); + for peer in all_peers { + timestamp += 1; + let mut modified_proposal_msg = proposal_msg.clone(); + modified_proposal_msg.set_timestamp(timestamp); + let signature = safety_rules.lock().sign_proposal(modified_proposal_msg.proposal().block_data())?; + modified_proposal_msg.set_signature(signature); + network.send_proposal(modified_proposal_msg.clone(), vec![peer]).await; + } + Err(anyhow::anyhow!("Injected leader equivocation")) + } } diff --git a/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs b/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs index dc640d98146da7..8bbda0172bd261 100644 --- a/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs +++ b/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs @@ -347,6 +347,47 @@ async fn test_execution_retry() { .unwrap(); } +#[tokio::test] +async fn test_fault_tolerance_of_leader_equivocation() { + let num_validators = 4; + + let swarm = create_swarm(num_validators, 1).await; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; + test_consensus_fault_tolerance( + validator_clients, + public_info, + 3, + 5.0, + 1, + Box::new(FailPointFailureInjection::new(Box::new(move |cycle, _| { + ( + vec![( + cycle % num_validators, + "consensus::leader_equivocation".to_string(), + format!("{}%return", 50), + )], + true, + ) + }))), + Box::new( + move |_, executed_epochs, executed_rounds, executed_transactions, _, _| { + successful_criteria(executed_epochs, executed_rounds, executed_transactions); + Ok(()) + }, + ), + true, + false, + ) + .await + .unwrap(); + panic!("test_fault_tolerance_of_leader_equivocation"); +} + #[tokio::test] async fn test_fault_tolerance_of_network_send() { // Randomly increase network failure rate, until network halts, and check that it comes back afterwards. diff --git a/testsuite/testcases/src/performance_test.rs b/testsuite/testcases/src/performance_test.rs index 63786565d1f98c..99a5510cc1b80f 100644 --- a/testsuite/testcases/src/performance_test.rs +++ b/testsuite/testcases/src/performance_test.rs @@ -2,8 +2,10 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 +use std::sync::Arc; + use crate::NetworkLoadTest; -use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Swarm, SwarmExt, Test, TestReport}; use async_trait::async_trait; pub struct PerformanceBenchmark; @@ -14,7 +16,33 @@ impl Test for PerformanceBenchmark { } } -impl NetworkLoadTest for PerformanceBenchmark {} +impl NetworkLoadTest for PerformanceBenchmark { + async fn test( + &self, + swarm: Arc>>, + _report: &mut TestReport, + duration: Duration, + ) -> Result<()> { + let validators = { swarm.read().await.get_validator_clients_with_names() }; + let num_bad_leaders = validators.len() / 10; + for (index, (name, validator)) in validators.iter().enumerate().take(num_bad_leaders) { + validator + .set_failpoint( + "consensus::leader_equivocation".to_string(), + "off".to_string(), + ) + .await + .map_err(|e| { + anyhow!( + "set_failpoint to set consensus leader equivocation on {} failed, {:?}", + name, + e + ) + })?; + }; + Ok(()) + } +} #[async_trait] impl NetworkTest for PerformanceBenchmark {