Skip to content

Commit

Permalink
feat: health client with cache (#1013)
Browse files Browse the repository at this point in the history
  • Loading branch information
NikolaMilosa authored Oct 23, 2024
1 parent d285cd3 commit 066bdc0
Show file tree
Hide file tree
Showing 10 changed files with 274 additions and 119 deletions.
14 changes: 5 additions & 9 deletions rs/cli/src/commands/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@ use std::{
};

use clap::Args;
use ic_management_backend::{
health::{HealthClient, HealthStatusQuerier},
lazy_registry::LazyRegistry,
};
use ic_management_backend::{health::HealthStatusQuerier, lazy_registry::LazyRegistry};
use ic_management_types::{HealthStatus, Network};
use ic_protobuf::registry::{
dc::v1::DataCenterRecord,
Expand Down Expand Up @@ -122,7 +119,7 @@ impl Registry {

let dcs = local_registry.get_datacenters()?;

let (subnets, nodes) = get_subnets_and_nodes(&local_registry, &node_operators, ctx.network()).await?;
let (subnets, nodes) = get_subnets_and_nodes(&local_registry, &node_operators, ctx.health_client()).await?;

let unassigned_nodes_config = local_registry.get_unassigned_nodes()?;

Expand Down Expand Up @@ -221,7 +218,7 @@ async fn get_node_operators(local_registry: &Arc<dyn LazyRegistry>, network: &Ne
async fn get_subnets_and_nodes(
local_registry: &Arc<dyn LazyRegistry>,
node_operators: &IndexMap<PrincipalId, NodeOperator>,
network: &Network,
health_client: Arc<dyn HealthStatusQuerier>,
) -> anyhow::Result<(Vec<SubnetRecord>, Vec<NodeDetails>)> {
let subnets = local_registry.subnets().await?;
let subnets = subnets
Expand Down Expand Up @@ -250,7 +247,7 @@ async fn get_subnets_and_nodes(
chain_key_config: record.chain_key_config.clone(),
})
.collect::<Vec<_>>();
let nodes = _get_nodes(local_registry, node_operators, &subnets, network).await?;
let nodes = _get_nodes(local_registry, node_operators, &subnets, health_client).await?;
let subnets = subnets
.into_iter()
.map(|subnet| {
Expand All @@ -270,9 +267,8 @@ async fn _get_nodes(
local_registry: &Arc<dyn LazyRegistry>,
node_operators: &IndexMap<PrincipalId, NodeOperator>,
subnets: &[SubnetRecord],
network: &Network,
health_client: Arc<dyn HealthStatusQuerier>,
) -> anyhow::Result<Vec<NodeDetails>> {
let health_client = HealthClient::new(network.clone());
let nodes_health = health_client.nodes().await?;

// Rewardable nodes for all node operators
Expand Down
34 changes: 17 additions & 17 deletions rs/cli/src/ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ use std::{

use ic_canisters::IcAgentCanisterClient;
use ic_management_backend::{
health::{self, HealthStatusQuerier},
health::HealthStatusQuerier,
lazy_git::LazyGit,
lazy_registry::LazyRegistry,
proposal::{ProposalAgent, ProposalAgentImpl},
};
use ic_management_types::Network;
use log::warn;
use url::Url;

use crate::{
artifact_downloader::{ArtifactDownloader, ArtifactDownloaderImpl},
Expand Down Expand Up @@ -57,8 +56,7 @@ pub struct DreContext {
#[allow(clippy::too_many_arguments)]
impl DreContext {
pub async fn new(
network: String,
nns_urls: Vec<Url>,
network: Network,
auth: AuthOpts,
neuron_id: Option<u64>,
verbose: bool,
Expand All @@ -71,13 +69,6 @@ impl DreContext {
health_client: Arc<dyn HealthStatusQuerier>,
store: Store,
) -> anyhow::Result<Self> {
let network = match store.is_offline() {
false => ic_management_types::Network::new(network.clone(), &nns_urls)
.await
.map_err(|e| anyhow::anyhow!(e))?,
true => Network::new_unchecked(network.clone(), &nns_urls)?,
};

Ok(Self {
proposal_agent: Arc::new(ProposalAgentImpl::new(&network.nns_urls)),
network,
Expand All @@ -103,11 +94,18 @@ impl DreContext {
})
}

// Method that will be called from `main.rs` and
// will return real implementations of services
pub(crate) async fn from_args(args: &Args) -> anyhow::Result<Self> {
let store = Store::new(args.offline)?;
let network = match store.is_offline() {
false => ic_management_types::Network::new(args.network.clone(), &args.nns_urls)
.await
.map_err(|e| anyhow::anyhow!(e))?,
true => Network::new_unchecked(args.network.clone(), &args.nns_urls)?,
};
Self::new(
args.network.clone(),
args.nns_urls.clone(),
network.clone(),
args.auth_opts.clone(),
args.neuron_id,
args.verbose,
Expand All @@ -117,9 +115,7 @@ impl DreContext {
args.forum_post_link.clone(),
args.ic_admin_version.clone(),
store.cordoned_features_fetcher()?,
Arc::new(health::HealthClient::new(
ic_management_types::Network::new(args.network.clone(), &args.nns_urls).await?,
)),
store.health_client(&network)?,
store,
)
.await
Expand Down Expand Up @@ -244,6 +240,10 @@ impl DreContext {
pub fn forum_post_link(&self) -> Option<String> {
self.forum_post_link.clone()
}

pub fn health_client(&self) -> Arc<dyn HealthStatusQuerier> {
self.health_client.clone()
}
}

#[cfg(test)]
Expand Down Expand Up @@ -287,7 +287,7 @@ pub mod tests {
forum_post_link: "https://forum.dfinity.org/t/123".to_string().into(),
dry_run: true,
artifact_downloader,
neuron: RefCell::new(None),
neuron: RefCell::new(Some(neuron.clone())),
proceed_without_confirmation: true,
version: crate::commands::IcAdminVersion::Strict("Shouldn't reach this because of mock".to_string()),
neuron_opts: super::NeuronOpts {
Expand Down
49 changes: 37 additions & 12 deletions rs/cli/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::{io::Read, os::unix::fs::PermissionsExt, path::PathBuf, sync::Arc, time
use flate2::bufread::GzDecoder;
use ic_canisters::governance::governance_canister_version;
use ic_management_backend::{
health::{HealthClient, HealthStatusQuerier},
lazy_registry::{LazyRegistry, LazyRegistryImpl},
proposal::ProposalAgent,
registry::sync_local_store_with_path,
Expand Down Expand Up @@ -50,7 +51,7 @@ impl Store {

fn new_inner(offline: bool, path: PathBuf) -> anyhow::Result<Self> {
if !path.exists() {
std::fs::create_dir_all(&path)?;
fs_err::create_dir_all(&path)?;
}
Ok(Self { path, offline })
}
Expand All @@ -72,7 +73,7 @@ impl Store {
network.name,
local_store_dir.display()
);
std::fs::create_dir_all(&local_store_dir)?
fs_err::create_dir_all(&local_store_dir)?
}

Ok(local_store_dir)
Expand All @@ -87,7 +88,7 @@ impl Store {
network.name,
dir.display()
);
std::fs::create_dir_all(&dir)?
fs_err::create_dir_all(&dir)?
}

Ok(dir)
Expand All @@ -102,7 +103,7 @@ impl Store {
let path = self.guest_labels_cache_dir(network)?.join("labels.yaml");

if !path.exists() {
std::fs::write(&path, "")?;
fs_err::write(&path, "")?;
}

Ok(path)
Expand All @@ -126,6 +127,7 @@ impl Store {
self.offline,
proposal_agent,
self.guest_labels_cache_path(network)?,
self.health_client(network)?,
)))
}

Expand All @@ -134,7 +136,7 @@ impl Store {

if !path.exists() {
info!("ic-admin.revisions dir was missing. Creating on path `{}`...", path.display());
std::fs::create_dir_all(&path)?;
fs_err::create_dir_all(&path)?;
}

Ok(path)
Expand All @@ -150,7 +152,7 @@ impl Store {

if !status_file.exists() {
info!("ic-admin.status file was missing. Creating on path `{}`...", status_file.display());
std::fs::write(&status_file, "")?
fs_err::write(&status_file, "")?
}

Ok(status_file)
Expand All @@ -171,10 +173,10 @@ impl Store {
let mut decoded = GzDecoder::new(body.as_ref());

let path_parent = path.parent().ok_or(anyhow::anyhow!("Failed to get parent for ic admin revision dir"))?;
std::fs::create_dir_all(path_parent).map_err(|_| anyhow::anyhow!("create_dir_all failed for {}", path_parent.display()))?;
let mut out = std::fs::File::create(path)?;
fs_err::create_dir_all(path_parent).map_err(|_| anyhow::anyhow!("create_dir_all failed for {}", path_parent.display()))?;
let mut out = fs_err::File::create(path)?;
std::io::copy(&mut decoded, &mut out)?;
std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o755))?;
fs_err::set_permissions(path, std::fs::Permissions::from_mode(0o755))?;
Ok(())
}

Expand Down Expand Up @@ -222,7 +224,7 @@ impl Store {
IcAdminVersion::Strict(ver) => self.init_ic_admin(ver, network, proceed_without_confirmation, neuron, dry_run).await,
// This is the most probable way of running
IcAdminVersion::FromGovernance => {
let mut status_file = std::fs::File::open(&self.ic_admin_status_file()?)?;
let mut status_file = fs_err::File::open(&self.ic_admin_status_file()?)?;
let elapsed = status_file.metadata()?.modified()?.elapsed().unwrap_or_default();

let mut version_from_file = "".to_string();
Expand Down Expand Up @@ -274,7 +276,7 @@ impl Store {

// Only update file when the sync
// with governance has been performed
std::fs::write(self.ic_admin_status_file()?, version)?;
fs_err::write(self.ic_admin_status_file()?, version)?;
Ok(ic_admin)
}
}
Expand All @@ -290,7 +292,7 @@ impl Store {

if !file.exists() {
info!("Cordoned features file was missing. Creating on path `{}`...", file.display());
std::fs::write(&file, "")?;
fs_err::write(&file, "")?;
}

Ok(file)
Expand All @@ -300,4 +302,27 @@ impl Store {
let file = self.cordoned_features_file()?;
Ok(Arc::new(CordonedFeatureFetcherImpl::new(file, self.is_offline())?))
}

#[cfg(test)]
pub fn node_health_file_outer(&self, network: &Network) -> anyhow::Result<PathBuf> {
self.node_health_file(network)
}

fn node_health_file(&self, network: &Network) -> anyhow::Result<PathBuf> {
let file = self.path().join("node_healths").join(&network.name).join("node_healths.json");

if !file.exists() {
info!("Node health file was missing. Creating on path `{}`...", file.display());
fs_err::create_dir_all(file.parent().unwrap())?;
fs_err::write(&file, "")?;
}

Ok(file)
}

pub fn health_client(&self, network: &Network) -> anyhow::Result<Arc<dyn HealthStatusQuerier>> {
let file = self.node_health_file(network)?;

Ok(Arc::new(HealthClient::new(network.clone(), Some(file), self.is_offline())))
}
}
6 changes: 2 additions & 4 deletions rs/cli/src/unit_tests/ctx_init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ fn get_deleted_status_file() -> PathBuf {

async fn get_context(network: &Network, version: IcAdminVersion) -> anyhow::Result<DreContext> {
DreContext::new(
network.name.clone(),
network.nns_urls.clone(),
network.clone(),
AuthOpts {
private_key_pem: None,
hsm_opts: crate::commands::HsmOpts {
Expand Down Expand Up @@ -174,8 +173,7 @@ async fn get_ctx_for_neuron_test(
offline: bool,
) -> anyhow::Result<DreContext> {
DreContext::new(
network,
vec![],
ic_management_types::Network::new_unchecked(network, &[]).unwrap(),
auth,
neuron_id,
true,
Expand Down
Loading

0 comments on commit 066bdc0

Please sign in to comment.