Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactors startup verify with accounts lt hash #3318

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions accounts-db/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ pub struct IndexGenerationInfo {
pub rent_paying_accounts_by_partition: RentPayingAccountsByPartition,
/// The lt hash of the old/duplicate accounts identified during index generation.
/// Will be used when verifying the accounts lt hash, after rebuilding a Bank.
pub duplicates_lt_hash: Box<DuplicatesLtHash>,
pub duplicates_lt_hash: Option<Box<DuplicatesLtHash>>,
}

#[derive(Debug, Default)]
Expand Down Expand Up @@ -8791,8 +8791,10 @@ impl AccountsDb {
self.accounts_index
.add_uncleaned_roots(uncleaned_roots.into_iter());
accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed);
let old_val = outer_duplicates_lt_hash.replace(duplicates_lt_hash);
assert!(old_val.is_none());
if self.is_experimental_accumulator_hash_enabled() {
let old_val = outer_duplicates_lt_hash.replace(duplicates_lt_hash);
assert!(old_val.is_none());
}
Comment on lines +8794 to +8797
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We now also wrap the duplicates lt hash, that generate_index() computes, in an Option. If the CLI arg is not set, we did not compute the duplicates. This Option makes that explicit.

In the future, we'll be able to check the snapshot if the accounts lt hash is enabled.

info!(
"accounts data len: {}",
accounts_data_len.load(Ordering::Relaxed)
Expand Down Expand Up @@ -8830,7 +8832,7 @@ impl AccountsDb {
rent_paying_accounts_by_partition: rent_paying_accounts_by_partition
.into_inner()
.unwrap(),
duplicates_lt_hash: outer_duplicates_lt_hash.unwrap(),
duplicates_lt_hash: outer_duplicates_lt_hash,
}
}

Expand Down
28 changes: 20 additions & 8 deletions core/tests/snapshots.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use {
itertools::Itertools,
log::{info, trace},
solana_accounts_db::{
accounts_db::{self, ACCOUNTS_DB_CONFIG_FOR_TESTING},
accounts_db::{self, AccountsDbConfig, ACCOUNTS_DB_CONFIG_FOR_TESTING},
accounts_index::AccountSecondaryIndexes,
epoch_accounts_hash::EpochAccountsHash,
},
Expand Down Expand Up @@ -55,7 +55,7 @@ use {
time::{Duration, Instant},
},
tempfile::TempDir,
test_case::test_case,
test_case::{test_case, test_matrix},
};

struct SnapshotTestConfig {
Expand Down Expand Up @@ -629,14 +629,22 @@ fn restore_from_snapshots_and_check_banks_are_equal(
Ok(())
}

/// Spin up the background services fully and test taking snapshots
#[test_case(V1_2_0, Development)]
#[test_case(V1_2_0, Devnet)]
#[test_case(V1_2_0, Testnet)]
#[test_case(V1_2_0, MainnetBeta)]
#[derive(Debug, Eq, PartialEq)]
enum VerifyAccountsKind {
Merkle,
Lattice,
}

/// Spin up the background services fully then test taking & verifying snapshots
#[test_matrix(
V1_2_0,
[Development, Devnet, Testnet, MainnetBeta],
[VerifyAccountsKind::Merkle, VerifyAccountsKind::Lattice]
)]
Comment on lines +638 to +643
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes add permutations on this snapshot test for verifying the accounts with both merkle and lattice based accounts hashing.

fn test_snapshots_with_background_services(
snapshot_version: SnapshotVersion,
cluster_type: ClusterType,
verify_accounts_kind: VerifyAccountsKind,
) {
solana_logger::setup();

Expand Down Expand Up @@ -821,6 +829,10 @@ fn test_snapshots_with_background_services(

// Load the snapshot and ensure it matches what's in BankForks
let (_tmp_dir, temporary_accounts_dir) = create_tmp_accounts_dir_for_tests();
let accounts_db_config = AccountsDbConfig {
enable_experimental_accumulator_hash: verify_accounts_kind == VerifyAccountsKind::Lattice,
..ACCOUNTS_DB_CONFIG_FOR_TESTING
};
let (deserialized_bank, ..) = snapshot_bank_utils::bank_from_latest_snapshot_archives(
&snapshot_test_config.snapshot_config.bank_snapshots_dir,
&snapshot_test_config
Expand All @@ -841,7 +853,7 @@ fn test_snapshots_with_background_services(
false,
false,
false,
Some(ACCOUNTS_DB_CONFIG_FOR_TESTING),
Some(accounts_db_config),
None,
exit.clone(),
)
Expand Down
186 changes: 105 additions & 81 deletions runtime/src/bank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5606,8 +5606,14 @@ impl Bank {
&self,
base: Option<(Slot, /*capitalization*/ u64)>,
mut config: VerifyAccountsHashConfig,
duplicates_lt_hash: Option<&DuplicatesLtHash>,
duplicates_lt_hash: Option<Box<DuplicatesLtHash>>,
) -> bool {
#[derive(Debug, Eq, PartialEq)]
enum VerifyKind {
Merkle,
Lattice,
}

let accounts = &self.rc.accounts;
// Wait until initial hash calc is complete before starting a new hash calc.
// This should only occur when we halt at a slot in ledger-tool.
Expand All @@ -5617,14 +5623,33 @@ impl Bank {
.wait_for_complete();

let slot = self.slot();
let is_accounts_lt_hash_enabled = self.is_accounts_lt_hash_enabled();
let verify_kind = if self
.rc
.accounts
.accounts_db
.is_experimental_accumulator_hash_enabled()
{
VerifyKind::Lattice
} else {
VerifyKind::Merkle
};
Comment on lines +5626 to +5635
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's the main change: we only look at the CLI arg to determine which hash kind to verify with.

Note that Bank::is_accounts_lt_hash_enabled() is the same as AccountsDb::is_experimental_accumulator_hash_enabled() today, but not when we add feature gate logic. This PR future proofs the feature gate changes, thus reducing future complexity.


if verify_kind == VerifyKind::Lattice {
// Calculating the accounts lt hash from storages *requires* a duplicates_lt_hash.
// If it is None here, then we must use the index instead, which also means we
// cannot run in the background.
if duplicates_lt_hash.is_none() {
config.run_in_background = false;
}
}

if config.require_rooted_bank && !accounts.accounts_db.accounts_index.is_alive_root(slot) {
if let Some(parent) = self.parent() {
info!(
"slot {slot} is not a root, so verify accounts hash on parent bank at slot {}",
parent.slot(),
);
if is_accounts_lt_hash_enabled {
if verify_kind == VerifyKind::Lattice {
// The duplicates_lt_hash is only valid for the current slot, so we must fall
// back to verifying the accounts lt hash with the index (which also means we
// cannot run in the background).
Expand All @@ -5638,15 +5663,6 @@ impl Bank {
}
}

if is_accounts_lt_hash_enabled {
// Calculating the accounts lt hash from storages *requires* a duplicates_lt_hash.
// If it is None here, then we must use the index instead, which also means we
// cannot run in the background.
if duplicates_lt_hash.is_none() {
config.run_in_background = false;
}
}

// The snapshot storages must be captured *before* starting the background verification.
// Otherwise, it is possible that a delayed call to `get_snapshot_storages()` will *not*
// get the correct storages required to calculate and verify the accounts hashes.
Expand All @@ -5673,7 +5689,6 @@ impl Bank {
let epoch_schedule = self.epoch_schedule().clone();
let rent_collector = self.rent_collector().clone();
let expected_accounts_lt_hash = self.accounts_lt_hash.lock().unwrap().clone();
let duplicates_lt_hash = duplicates_lt_hash.cloned();
accounts.accounts_db.verify_accounts_hash_in_bg.start(|| {
Builder::new()
.name("solBgHashVerify".into())
Expand All @@ -5682,49 +5697,53 @@ impl Bank {
let start = Instant::now();
let mut lattice_verify_time = None;
let mut merkle_verify_time = None;
if is_accounts_lt_hash_enabled {
// accounts lt hash is *enabled* so use lattice-based verification
let accounts_db = &accounts_.accounts_db;
let (calculated_accounts_lt_hash, duration) =
meas_dur!(accounts_db.thread_pool_hash.install(|| {
accounts_db.calculate_accounts_lt_hash_at_startup_from_storages(
snapshot_storages.0.as_slice(),
&duplicates_lt_hash.unwrap(),
)
}));
if calculated_accounts_lt_hash != expected_accounts_lt_hash {
error!(
"Verifying accounts lt hash failed: hashes do not match, \
expected: {}, calculated: {}",
expected_accounts_lt_hash.0.checksum(),
calculated_accounts_lt_hash.0.checksum(),
);
return false;
match verify_kind {
VerifyKind::Lattice => {
Comment on lines +5700 to +5701
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know the rest of the changes here look like a lot, but they are just moving from if {} else {} to a match with the VerifyKind arms. Ignoring whitespace in the diff will help. No logic was changed.

// accounts lt hash is *enabled* so use lattice-based verification
let accounts_db = &accounts_.accounts_db;
let (calculated_accounts_lt_hash, duration) =
meas_dur!(accounts_db.thread_pool_hash.install(|| {
accounts_db
.calculate_accounts_lt_hash_at_startup_from_storages(
snapshot_storages.0.as_slice(),
&duplicates_lt_hash.unwrap(),
)
}));
if calculated_accounts_lt_hash != expected_accounts_lt_hash {
let expected = expected_accounts_lt_hash.0.checksum();
let calculated = calculated_accounts_lt_hash.0.checksum();
error!(
"Verifying accounts failed: accounts lattice hashes do not \
match, expected: {expected}, calculated: {calculated}",
);
return false;
}
lattice_verify_time = Some(duration);
}
lattice_verify_time = Some(duration);
} else {
// accounts lt hash is *disabled* so use merkle-based verification
let snapshot_storages_and_slots = (
snapshot_storages.0.as_slice(),
snapshot_storages.1.as_slice(),
);
let (result, duration) = meas_dur!(accounts_
.verify_accounts_hash_and_lamports(
snapshot_storages_and_slots,
slot,
capitalization,
base,
VerifyAccountsHashAndLamportsConfig {
ancestors: &ancestors,
epoch_schedule: &epoch_schedule,
rent_collector: &rent_collector,
..verify_config
},
));
if !result {
return false;
VerifyKind::Merkle => {
// accounts lt hash is *disabled* so use merkle-based verification
let snapshot_storages_and_slots = (
snapshot_storages.0.as_slice(),
snapshot_storages.1.as_slice(),
);
let (result, duration) = meas_dur!(accounts_
.verify_accounts_hash_and_lamports(
snapshot_storages_and_slots,
slot,
capitalization,
base,
VerifyAccountsHashAndLamportsConfig {
ancestors: &ancestors,
epoch_schedule: &epoch_schedule,
rent_collector: &rent_collector,
..verify_config
},
));
if !result {
return false;
}
merkle_verify_time = Some(duration);
}
merkle_verify_time = Some(duration);
}
accounts_
.accounts_db
Expand All @@ -5751,45 +5770,50 @@ impl Bank {
});
true // initial result is true. We haven't failed yet. If verification fails, we'll panic from bg thread.
} else {
if is_accounts_lt_hash_enabled {
let expected_accounts_lt_hash = self.accounts_lt_hash.lock().unwrap().clone();
let calculated_accounts_lt_hash =
if let Some(duplicates_lt_hash) = duplicates_lt_hash {
match verify_kind {
VerifyKind::Lattice => {
let expected_accounts_lt_hash = self.accounts_lt_hash.lock().unwrap().clone();
let calculated_accounts_lt_hash = if let Some(duplicates_lt_hash) =
duplicates_lt_hash
{
accounts
.accounts_db
.calculate_accounts_lt_hash_at_startup_from_storages(
snapshot_storages.0.as_slice(),
duplicates_lt_hash,
&duplicates_lt_hash,
)
} else {
accounts
.accounts_db
.calculate_accounts_lt_hash_at_startup_from_index(&self.ancestors, slot)
};
if calculated_accounts_lt_hash != expected_accounts_lt_hash {
error!(
"Verifying accounts lt hash failed: hashes do not match, expected: {}, calculated: {}",
expected_accounts_lt_hash.0.checksum(),
calculated_accounts_lt_hash.0.checksum(),
let is_ok = calculated_accounts_lt_hash == expected_accounts_lt_hash;
if !is_ok {
let expected = expected_accounts_lt_hash.0.checksum();
let calculated = calculated_accounts_lt_hash.0.checksum();
error!(
"Verifying accounts failed: accounts lattice hashes do not \
match, expected: {expected}, calculated: {calculated}",
);
}
is_ok
}
VerifyKind::Merkle => {
let snapshot_storages_and_slots = (
snapshot_storages.0.as_slice(),
snapshot_storages.1.as_slice(),
);
let result = accounts.verify_accounts_hash_and_lamports(
snapshot_storages_and_slots,
slot,
capitalization,
base,
verify_config,
);
return false;
self.set_initial_accounts_hash_verification_completed();
result
}
// if we get here then the accounts lt hash is correct
}

let snapshot_storages_and_slots = (
snapshot_storages.0.as_slice(),
snapshot_storages.1.as_slice(),
);
let result = accounts.verify_accounts_hash_and_lamports(
snapshot_storages_and_slots,
slot,
capitalization,
base,
verify_config,
);
self.set_initial_accounts_hash_verification_completed();
result
}
}

Expand Down Expand Up @@ -6108,7 +6132,7 @@ impl Bank {
force_clean: bool,
latest_full_snapshot_slot: Slot,
base: Option<(Slot, /*capitalization*/ u64)>,
duplicates_lt_hash: Option<&DuplicatesLtHash>,
duplicates_lt_hash: Option<Box<DuplicatesLtHash>>,
) -> bool {
let (_, clean_time_us) = measure_us!({
let should_clean = force_clean || (!skip_shrink && self.slot() > 0);
Expand Down
6 changes: 3 additions & 3 deletions runtime/src/serde_snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ pub(crate) fn fields_from_streams(
/// This struct contains side-info while reconstructing the bank from streams
#[derive(Debug)]
pub struct BankFromStreamsInfo {
pub duplicates_lt_hash: Box<DuplicatesLtHash>,
pub duplicates_lt_hash: Option<Box<DuplicatesLtHash>>,
}

#[allow(clippy::too_many_arguments)]
Expand Down Expand Up @@ -842,7 +842,7 @@ impl<'a> solana_frozen_abi::abi_example::TransparentAsHelper for SerializableAcc
/// This struct contains side-info while reconstructing the bank from fields
#[derive(Debug)]
struct ReconstructedBankInfo {
duplicates_lt_hash: Box<DuplicatesLtHash>,
duplicates_lt_hash: Option<Box<DuplicatesLtHash>>,
}

#[allow(clippy::too_many_arguments)]
Expand Down Expand Up @@ -1037,7 +1037,7 @@ pub(crate) fn remap_and_reconstruct_single_storage(
#[derive(Debug, Default, Clone)]
pub struct ReconstructedAccountsDbInfo {
pub accounts_data_len: u64,
pub duplicates_lt_hash: Box<DuplicatesLtHash>,
pub duplicates_lt_hash: Option<Box<DuplicatesLtHash>>,
}

#[allow(clippy::too_many_arguments)]
Expand Down
4 changes: 2 additions & 2 deletions runtime/src/snapshot_bank_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ pub fn bank_from_snapshot_archives(
accounts_db_force_initial_clean,
full_snapshot_archive_info.slot(),
base,
Some(&info.duplicates_lt_hash),
info.duplicates_lt_hash,
) && limit_load_slot_count_from_snapshot.is_none()
{
panic!("Snapshot bank for slot {} failed to verify", bank.slot());
Expand Down Expand Up @@ -548,7 +548,7 @@ fn deserialize_status_cache(
/// This struct contains side-info from rebuilding the bank
#[derive(Debug)]
struct RebuiltBankInfo {
duplicates_lt_hash: Box<DuplicatesLtHash>,
duplicates_lt_hash: Option<Box<DuplicatesLtHash>>,
}

#[allow(clippy::too_many_arguments)]
Expand Down
Loading