Skip to content

Commit

Permalink
feat: connect over tcp if initiator isn't rdma capable
Browse files Browse the repository at this point in the history
Signed-off-by: Diwakar Sharma <[email protected]>
  • Loading branch information
dsharma-dc committed Oct 18, 2024
1 parent 51dc8c8 commit 361cbaa
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 4 deletions.
15 changes: 15 additions & 0 deletions control-plane/agents/src/bin/ha/node/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ fn is_same_controller(new_path_uri: &ParsedUri, subsystem: &Subsystem) -> Result
.parse()
.map_err(|_| SvcError::InvalidArguments {})?;

tracing::info!(
"Check same controller. old subsys {:?}, new path {:?}",
subsystem,
new_path_uri
);
let same_transport = new_path_uri.transport().eq(&ctrlr_transport);
let same_host_port = subsystem
.address
Expand Down Expand Up @@ -476,6 +481,16 @@ struct ParsedUri {
nqn: String,
}

impl std::fmt::Debug for ParsedUri {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ParsedUri")
.field("host", &self.host())
.field("port", &self.port())
.field("transport", &self.transport())
.field("nqn", &self.nqn())
.finish()
}
}
impl ParsedUri {
fn new(uri: Uri) -> Result<ParsedUri, SvcError> {
let host = uri.host().ok_or(SvcError::InvalidArguments {})?.to_string();
Expand Down
28 changes: 27 additions & 1 deletion control-plane/csi-driver/src/bin/node/dev/nvmf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use csi_driver::PublishParams;
use glob::glob;
use nvmeadm::nvmf_subsystem::Subsystem;
use regex::Regex;
use tracing::info;
use udev::{Device, Enumerator};
use url::Url;
use uuid::Uuid;
Expand All @@ -22,6 +23,7 @@ use crate::{
config::{config, NvmeConfig, NvmeParseParams},
dev::util::extract_uuid,
match_dev::match_nvmf_device,
node::RDMA_CONNECT_CHECK,
};

use super::{Attach, Detach, DeviceError, DeviceName};
Expand Down Expand Up @@ -307,6 +309,16 @@ pub(crate) fn check_nvme_tcp_module() -> Result<(), std::io::Error> {
Ok(())
}

/// Check for the presence of nvme tcp kernel module.
/// TODO: Handle the case where this(and for that matter nvme_tcp too)
/// could be builtin module.
#[allow(unused)]
pub(crate) fn check_nvme_rdma_module() -> Result<(), std::io::Error> {
let path = "/sys/module/nvme_rdma";
std::fs::metadata(path)?;
Ok(())
}

/// Set the nvme_core module IO timeout
/// (note, this is a system-wide parameter)
pub(crate) fn set_nvmecore_iotimeout(io_timeout_secs: u32) -> Result<(), std::io::Error> {
Expand Down Expand Up @@ -359,5 +371,19 @@ pub(crate) fn transport_from_url(url: &Url) -> Result<TrType, DeviceError> {
.split('+')
.nth(1)
.unwrap_or(default_xprt.as_str());
TrType::from_str(xprt).map_err(|e| DeviceError::new(format!("{e:?}").as_str()))

let ret_xprt = TrType::from_str(xprt).map_err(|e| DeviceError::new(format!("{e:?}").as_str()));
let connect_cap_check = RDMA_CONNECT_CHECK.get().unwrap_or(&(false, false));

if !connect_cap_check.0 {
ret_xprt
} else {
match ret_xprt {
Ok(t) if t == TrType::rdma && !connect_cap_check.1 => {
info!("rdma incapable node, connecting over tcp");
Ok(TrType::tcp)
}
_else => _else,
}
}
}
38 changes: 37 additions & 1 deletion control-plane/csi-driver/src/bin/node/main_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{
identity::Identity,
k8s::patch_k8s_node,
mount::probe_filesystems,
node::Node,
node::{Node, RDMA_CONNECT_CHECK},
nodeplugin_grpc::NodePluginGrpcServer,
nodeplugin_nvme::NvmeOperationsSvc,
registration::run_registration_loop,
Expand Down Expand Up @@ -221,6 +221,16 @@ pub(super) async fn main() -> anyhow::Result<()> {
.value_parser(clap::value_parser!(bool))
.help("Enable ansi color for logs")
)
.arg(
Arg::new("nvme-connect-fallback")
.long("nvme-connect-fallback")
.default_value("false")
.value_parser(clap::value_parser!(bool))
.help(
"Enable falling back to nvme connect over tcp if initiator node is not rdma capable, \n\
even though volume target is rdma capable."
)
)
.subcommand(
clap::Command::new("fs-freeze")
.arg(
Expand Down Expand Up @@ -327,6 +337,32 @@ pub(super) async fn main() -> anyhow::Result<()> {
check_ana_and_label_node(&kube_client, node_name, nvme_enabled).await?;
}

let conn_fallback = matches.get_flag("nvme-connect-fallback");
// Check and store if this node has an RDMA device so that nvme connect over RDMA
// can be done. If this node is also an io-engine node with rdma enabled and working,
// then this check will always set capability true.
let ibv_output = tokio::process::Command::new("ibv_devinfo")
.arg("-l")
.output()
.await;

if ibv_output.as_ref().is_ok_and(|s| {
s.status.success()
&& !String::from_utf8(s.stdout.clone())
.unwrap_or_default()
.starts_with("0 HCA")
}) {
info!(
"host node rdma capable. conn_fallback({conn_fallback:?}), {:?}",
String::from_utf8(ibv_output.unwrap().stdout.clone())
);
// todo: check for the presence of nvme_rdma too. But don't bail out as we do for nvme_tcp.
let _ = RDMA_CONNECT_CHECK.set((conn_fallback, true));
} else {
error!("Error executing ibv_devinfo, or no output received. conn_fallback({conn_fallback:?}), {ibv_output:?}");
let _ = RDMA_CONNECT_CHECK.set((conn_fallback, false));
}

// Parse the CSI socket file name from the command line arguments.
let csi_socket = matches
.get_one::<String>("csi-socket")
Expand Down
7 changes: 7 additions & 0 deletions control-plane/csi-driver/src/bin/node/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use csi_driver::{
filesystem::FileSystem as Fs,
limiter::VolumeOpGuard,
};
use once_cell::sync::OnceCell;
use rpc::{
csi,
csi::{
Expand Down Expand Up @@ -76,6 +77,12 @@ impl Node {

const ATTACH_TIMEOUT_INTERVAL: Duration = Duration::from_millis(100);
const ATTACH_RETRIES: u32 = 100;
// A type and static variable used to check and set node's rdma capability during startup.
// This is used to decide if initiator can indeed connect over rdma.
// First index bool tells if we need to fallback to tcp connect in case initiator node is
// not rdma capable. Second index bool is the actual capability indicator.
type CheckAndFallbackNvmeConnect = (bool, bool);
pub(crate) static RDMA_CONNECT_CHECK: OnceCell<CheckAndFallbackNvmeConnect> = OnceCell::new();

// Determine if given access mode in conjunction with ro mount flag makes
// sense or not. If access mode is not supported or the combination does
Expand Down
1 change: 1 addition & 0 deletions nix/overlay.nix
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ self: super: {
control-plane = super.callPackage ./pkgs/control-plane { inherit allInOne incremental tag; };
openapi-generator = super.callPackage ./pkgs/openapi-generator { };
xfsprogs_5_16 = (import (super.sources).nixpkgs-22_05 { }).xfsprogs;
rdmacore = (import (super.sources).nixpkgs-22_05 { }).rdma-core;
}
4 changes: 2 additions & 2 deletions nix/pkgs/images/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# avoid dependency on docker tool chain. Though the maturity of OCI
# builder in nixpkgs is questionable which is why we postpone this step.

{ pkgs, xfsprogs_5_16, busybox, dockerTools, lib, e2fsprogs, btrfs-progs, utillinux, fetchurl, control-plane, tini, sourcer, img_tag ? "", img_org ? "", img_prefix }:
{ pkgs, xfsprogs_5_16, rdmacore, busybox, dockerTools, lib, e2fsprogs, btrfs-progs, utillinux, fetchurl, control-plane, tini, sourcer, img_tag ? "", img_org ? "", img_prefix }:
let
repo-org = if img_org != "" then img_org else "${builtins.readFile (pkgs.runCommand "repo_org" {
buildInputs = with pkgs; [ git ];
Expand Down Expand Up @@ -104,7 +104,7 @@ let
inherit buildType;
name = "node";
config = {
Env = [ "PATH=${lib.makeBinPath [ "/" xfsprogs e2fsprogs_1_46_2 btrfs-progs utillinux ]}" ];
Env = [ "PATH=${lib.makeBinPath [ "/" xfsprogs rdmacore e2fsprogs_1_46_2 btrfs-progs utillinux ]}" ];
};
};
};
Expand Down

0 comments on commit 361cbaa

Please sign in to comment.