From a16ac6818cf5037c8b36396c9edb7cf1bb3bb1ec Mon Sep 17 00:00:00 2001
From: Jon Lange <jlange@microsoft.com>
Date: Tue, 7 Jan 2025 10:54:18 -0800
Subject: [PATCH 1/5] platform: suppress SVSM use of interrupts on KVM/QEMU

KVM does not support tracking interrupt state separately for each VMPL.
The use of interrupts by the SVSM should be suppressed when running with
SNP any time the SVSM is running under KVM/QEMU, not just when
restricted injection is enabled.

Signed-off-by: Jon Lange <jlange@microsoft.com>
---
 bootlib/src/kernel_launch.rs  |  1 +
 kernel/src/platform/mod.rs    | 14 ++++++++++----
 kernel/src/platform/native.rs | 14 ++++----------
 kernel/src/platform/snp.rs    | 20 +++-----------------
 kernel/src/platform/tdp.rs    |  8 +-------
 kernel/src/sev/status.rs      |  4 ----
 kernel/src/stage2.rs          | 10 +++++++++-
 kernel/src/svsm.rs            |  2 +-
 8 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/bootlib/src/kernel_launch.rs b/bootlib/src/kernel_launch.rs
index 7be5dd7ff..19559ca25 100644
--- a/bootlib/src/kernel_launch.rs
+++ b/bootlib/src/kernel_launch.rs
@@ -43,6 +43,7 @@ pub struct KernelLaunchInfo {
     pub vtom: u64,
     pub debug_serial_port: u16,
     pub use_alternate_injection: bool,
+    pub suppress_svsm_interrupts: bool,
     pub platform_type: SvsmPlatformType,
 }
 
diff --git a/kernel/src/platform/mod.rs b/kernel/src/platform/mod.rs
index a1557f077..cc26b82b3 100644
--- a/kernel/src/platform/mod.rs
+++ b/kernel/src/platform/mod.rs
@@ -180,12 +180,18 @@ pub enum SvsmPlatformCell {
 }
 
 impl SvsmPlatformCell {
-    pub fn new(platform_type: SvsmPlatformType) -> Self {
+    pub fn new(platform_type: SvsmPlatformType, suppress_svsm_interrupts: bool) -> Self {
         assert_eq!(platform_type, *SVSM_PLATFORM_TYPE);
         match platform_type {
-            SvsmPlatformType::Native => SvsmPlatformCell::Native(NativePlatform::new()),
-            SvsmPlatformType::Snp => SvsmPlatformCell::Snp(SnpPlatform::new()),
-            SvsmPlatformType::Tdp => SvsmPlatformCell::Tdp(TdpPlatform::new()),
+            SvsmPlatformType::Native => {
+                SvsmPlatformCell::Native(NativePlatform::new(suppress_svsm_interrupts))
+            }
+            SvsmPlatformType::Snp => {
+                SvsmPlatformCell::Snp(SnpPlatform::new(suppress_svsm_interrupts))
+            }
+            SvsmPlatformType::Tdp => {
+                SvsmPlatformCell::Tdp(TdpPlatform::new(suppress_svsm_interrupts))
+            }
         }
     }
 }
diff --git a/kernel/src/platform/native.rs b/kernel/src/platform/native.rs
index 454830e35..8d3b5b5da 100644
--- a/kernel/src/platform/native.rs
+++ b/kernel/src/platform/native.rs
@@ -35,21 +35,15 @@ pub struct NativePlatform {
 }
 
 impl NativePlatform {
-    pub fn new() -> Self {
-        Self {
-            is_hyperv: is_hyperv_hypervisor(),
-        }
-    }
-}
-
-impl Default for NativePlatform {
-    fn default() -> Self {
+    pub fn new(_suppress_svsm_interrupts: bool) -> Self {
         // Execution is not possible unless X2APIC is supported.
         let features = CpuidResult::get(1, 0);
         if (features.ecx & 0x200000) == 0 {
             panic!("X2APIC is not supported");
         }
-        Self::new()
+        Self {
+            is_hyperv: is_hyperv_hypervisor(),
+        }
     }
 }
 
diff --git a/kernel/src/platform/snp.rs b/kernel/src/platform/snp.rs
index ee4ae66ce..d9c27bf0a 100644
--- a/kernel/src/platform/snp.rs
+++ b/kernel/src/platform/snp.rs
@@ -25,7 +25,7 @@ use crate::sev::hv_doorbell::current_hv_doorbell;
 use crate::sev::msr_protocol::{
     hypervisor_ghcb_features, request_termination_msr, verify_ghcb_version, GHCBHvFeatures,
 };
-use crate::sev::status::{sev_restricted_injection, vtom_enabled};
+use crate::sev::status::vtom_enabled;
 use crate::sev::{
     init_hypervisor_ghcb_features, pvalidate_range, sev_status_init, sev_status_verify, PvalidateOp,
 };
@@ -82,19 +82,13 @@ pub struct SnpPlatform {
 }
 
 impl SnpPlatform {
-    pub fn new() -> Self {
+    pub fn new(suppress_svsm_interrupts: bool) -> Self {
         Self {
-            can_use_interrupts: false,
+            can_use_interrupts: !suppress_svsm_interrupts,
         }
     }
 }
 
-impl Default for SnpPlatform {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 impl SvsmPlatform for SnpPlatform {
     #[cfg(test)]
     fn platform_type(&self) -> SvsmPlatformType {
@@ -104,14 +98,6 @@ impl SvsmPlatform for SnpPlatform {
     fn env_setup(&mut self, _debug_serial_port: u16, vtom: usize) -> Result<(), SvsmError> {
         sev_status_init();
         VTOM.init(&vtom).map_err(|_| SvsmError::PlatformInit)?;
-
-        // Now that SEV status is initialized, determine whether this platform
-        // supports the use of SVSM interrupts.  SVSM interrupts are supported
-        // if this system uses restricted injection.
-        if sev_restricted_injection() {
-            self.can_use_interrupts = true;
-        }
-
         Ok(())
     }
 
diff --git a/kernel/src/platform/tdp.rs b/kernel/src/platform/tdp.rs
index 8fde2f872..c14710762 100644
--- a/kernel/src/platform/tdp.rs
+++ b/kernel/src/platform/tdp.rs
@@ -31,17 +31,11 @@ static VTOM: ImmutAfterInitCell<usize> = ImmutAfterInitCell::uninit();
 pub struct TdpPlatform {}
 
 impl TdpPlatform {
-    pub fn new() -> Self {
+    pub fn new(_suppress_svsm_interrupts: bool) -> Self {
         Self {}
     }
 }
 
-impl Default for TdpPlatform {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 impl SvsmPlatform for TdpPlatform {
     #[cfg(test)]
     fn platform_type(&self) -> SvsmPlatformType {
diff --git a/kernel/src/sev/status.rs b/kernel/src/sev/status.rs
index 06a0503b1..1af139a8c 100644
--- a/kernel/src/sev/status.rs
+++ b/kernel/src/sev/status.rs
@@ -150,10 +150,6 @@ pub fn vtom_enabled() -> bool {
     sev_flags().contains(SEVStatusFlags::VTOM)
 }
 
-pub fn sev_restricted_injection() -> bool {
-    sev_flags().contains(SEVStatusFlags::REST_INJ)
-}
-
 pub fn sev_status_verify() {
     let required = SEVStatusFlags::SEV | SEVStatusFlags::SEV_ES | SEVStatusFlags::SEV_SNP;
     let supported = SEVStatusFlags::DBGSWP
diff --git a/kernel/src/stage2.rs b/kernel/src/stage2.rs
index e78223631..bc91a6c8d 100755
--- a/kernel/src/stage2.rs
+++ b/kernel/src/stage2.rs
@@ -352,7 +352,7 @@ pub extern "C" fn stage2_main(launch_info: &Stage2LaunchInfo) {
     let platform_type = SvsmPlatformType::from(launch_info.platform_type);
 
     init_platform_type(platform_type);
-    let mut platform = SvsmPlatformCell::new(platform_type);
+    let mut platform = SvsmPlatformCell::new(platform_type, true);
 
     let config =
         get_svsm_config(launch_info, &*platform).expect("Failed to get SVSM configuration");
@@ -406,6 +406,13 @@ pub extern "C" fn stage2_main(launch_info: &Stage2LaunchInfo) {
     )
     .expect("Failed to map and validate heap");
 
+    // Determine whether use of interrupts n the SVSM should be suppressed.
+    // This is required when running SNP under KVM/QEMU.
+    let suppress_svsm_interrupts = match platform_type {
+        SvsmPlatformType::Snp => config.is_qemu(),
+        _ => false,
+    };
+
     // Build the handover information describing the memory layout and hand
     // control to the SVSM kernel.
     let launch_info = KernelLaunchInfo {
@@ -430,6 +437,7 @@ pub extern "C" fn stage2_main(launch_info: &Stage2LaunchInfo) {
         vtom: launch_info.vtom,
         debug_serial_port: config.debug_serial_port(),
         use_alternate_injection: config.use_alternate_injection(),
+        suppress_svsm_interrupts,
         platform_type,
     };
 
diff --git a/kernel/src/svsm.rs b/kernel/src/svsm.rs
index be941b478..bed9936ed 100755
--- a/kernel/src/svsm.rs
+++ b/kernel/src/svsm.rs
@@ -171,7 +171,7 @@ pub extern "C" fn svsm_start(li: &KernelLaunchInfo, vb_addr: usize) {
         .init(li)
         .expect("Already initialized launch info");
 
-    let mut platform = SvsmPlatformCell::new(li.platform_type);
+    let mut platform = SvsmPlatformCell::new(li.platform_type, li.suppress_svsm_interrupts);
 
     init_cpuid_table(VirtAddr::from(launch_info.cpuid_page));
 

From a4981642063d8dc83f219b97bedebe9e8b200a82 Mon Sep 17 00:00:00 2001
From: Jon Lange <jlange@microsoft.com>
Date: Sun, 24 Nov 2024 20:51:18 -0800
Subject: [PATCH 2/5] cpu: implement `CpuSet`

`CpuSet` is an object that can represent a set of CPUs.  Two versions
are defined, both atomic and non-atomic.  The atomic version permits
multiple threads to simultaneously add, remove, or iterate elements of
the set.

Signed-off-by: Jon Lange <jlange@microsoft.com>
---
 kernel/src/cpu/cpuset.rs | 161 +++++++++++++++++++++++++++++++++++++++
 kernel/src/cpu/mod.rs    |   1 +
 2 files changed, 162 insertions(+)
 create mode 100644 kernel/src/cpu/cpuset.rs

diff --git a/kernel/src/cpu/cpuset.rs b/kernel/src/cpu/cpuset.rs
new file mode 100644
index 000000000..bfe6490b9
--- /dev/null
+++ b/kernel/src/cpu/cpuset.rs
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: MIT OR Apache-2.0
+//
+// Copyright (c) Microsoft Corporation
+//
+// Author: Jon Lange (jlange@microsoft.com)
+
+use core::sync::atomic::{AtomicU64, Ordering};
+
+pub const MAX_CPUS: usize = 1024;
+
+/// Represents a set of CPUs, based on CPU index.  A maximum of `MAX_CPUS` can
+/// be represented.
+#[derive(Copy, Clone, Debug, Default)]
+pub struct CpuSet {
+    bitmask: [u64; (MAX_CPUS + 63) / 64],
+}
+
+impl CpuSet {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Adds a CPU to the set.
+    ///
+    /// * `cpu_index`: the index of the CPU to add to the set.
+    pub fn add(&mut self, cpu_index: usize) {
+        self.bitmask[cpu_index >> 6] |= 1u64 << (cpu_index & 0x3F);
+    }
+
+    /// Removes a CPU from the set.
+    ///
+    /// * `cpu_index`: the index of the CPU to remove from the set.
+    pub fn remove(&mut self, cpu_index: usize) {
+        self.bitmask[cpu_index >> 6] &= !(1u64 << (cpu_index & 0x3F));
+    }
+
+    /// Produces an iterator to iterate over the set.
+    pub fn iter(&self) -> CpuSetIterator<'_> {
+        CpuSetIterator::new(self)
+    }
+}
+
+#[derive(Debug)]
+pub struct CpuSetIterator<'a> {
+    cpu_set: &'a CpuSet,
+    current_mask: u64,
+    mask_index: usize,
+}
+
+impl<'a> CpuSetIterator<'a> {
+    fn new(cpu_set: &'a CpuSet) -> Self {
+        Self {
+            cpu_set,
+            current_mask: cpu_set.bitmask[0],
+            mask_index: 0,
+        }
+    }
+}
+
+impl Iterator for CpuSetIterator<'_> {
+    type Item = usize;
+    fn next(&mut self) -> Option<usize> {
+        while self.current_mask == 0 {
+            self.mask_index += 1;
+            if self.mask_index == self.cpu_set.bitmask.len() {
+                return None;
+            }
+
+            self.current_mask = self.cpu_set.bitmask[self.mask_index];
+        }
+
+        let index = self.current_mask.trailing_zeros();
+        self.current_mask &= !(1u64 << index);
+        Some((self.mask_index << 6) | index as usize)
+    }
+}
+
+/// Represents a set of CPUs, based on CPU index, which supports atomic
+/// addition and removal.  A maximum of 1024 CPUs can be represented.
+#[derive(Debug, Default)]
+pub struct AtomicCpuSet {
+    bitmask: [AtomicU64; (MAX_CPUS + 63) / 64],
+}
+
+impl AtomicCpuSet {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Adds a CPU to the set.
+    ///
+    /// * `cpu_index`: the index of the CPU to add to the set.
+    /// * `ordering`: the atomic ordering rules to be used when adding the CPU.
+    pub fn add(&self, cpu_index: usize, ordering: Ordering) {
+        self.bitmask[cpu_index >> 6].fetch_or(1u64 << (cpu_index & 0x3F), ordering);
+    }
+
+    /// Removes a CPU from the set.
+    ///
+    /// * `cpu_index`: the index of the CPU to remove from the set.
+    /// * `ordering`: the atomic ordering rules to be used when adding the CPU.
+    pub fn remove(&self, cpu_index: usize, ordering: Ordering) {
+        self.bitmask[cpu_index >> 6].fetch_and(!(1u64 << (cpu_index & 0x3F)), ordering);
+    }
+
+    /// Produces an iterator to iterate over the set.  This iterator consumes
+    /// the set, so the action of iterating will remove all items from the set.
+    /// Items added while iteration is underway may or may not be observed by
+    /// the iterator.
+    ///
+    /// * `ordering` - The memory ordering to apply as elements are removed
+    ///   from the set.
+    pub fn iter(&self, ordering: Ordering) -> AtomicCpuSetIterator<'_> {
+        AtomicCpuSetIterator::new(self, ordering)
+    }
+}
+
+impl Clone for AtomicCpuSet {
+    fn clone(&self) -> Self {
+        let clone = AtomicCpuSet::new();
+        for (i, mask) in self.bitmask.iter().enumerate() {
+            clone.bitmask[i].store(mask.load(Ordering::Relaxed), Ordering::Relaxed);
+        }
+        clone
+    }
+}
+
+#[derive(Debug)]
+pub struct AtomicCpuSetIterator<'a> {
+    cpu_set: &'a AtomicCpuSet,
+    ordering: Ordering,
+    mask_index: usize,
+}
+
+impl<'a> AtomicCpuSetIterator<'a> {
+    fn new(cpu_set: &'a AtomicCpuSet, ordering: Ordering) -> Self {
+        Self {
+            cpu_set,
+            ordering,
+            mask_index: 0,
+        }
+    }
+}
+
+impl Iterator for AtomicCpuSetIterator<'_> {
+    type Item = usize;
+    fn next(&mut self) -> Option<usize> {
+        while self.mask_index < self.cpu_set.bitmask.len() {
+            let mask = self.cpu_set.bitmask[self.mask_index].load(Ordering::Relaxed);
+            if mask != 0 {
+                let index = mask.trailing_zeros();
+                let cpu_mask = 1u64 << index;
+                self.cpu_set.bitmask[self.mask_index].fetch_and(!cpu_mask, self.ordering);
+                return Some((self.mask_index << 6) | index as usize);
+            }
+            self.mask_index += 1;
+        }
+
+        None
+    }
+}
diff --git a/kernel/src/cpu/mod.rs b/kernel/src/cpu/mod.rs
index b6e6d7828..1819026bf 100644
--- a/kernel/src/cpu/mod.rs
+++ b/kernel/src/cpu/mod.rs
@@ -7,6 +7,7 @@
 pub mod apic;
 pub mod control_regs;
 pub mod cpuid;
+pub mod cpuset;
 pub mod efer;
 pub mod extable;
 pub mod features;

From 2a3f21fc55a6557439ccb0447e7116406f5f5e0d Mon Sep 17 00:00:00 2001
From: Jon Lange <jlange@microsoft.com>
Date: Mon, 25 Nov 2024 16:06:52 -0800
Subject: [PATCH 3/5] utils: introduce `ScopedRef`

`ScopedRef` and `ScopedMut` are designed to solve the problem of
managing lifetimes of references created from pointers.  Normally, when
a reference is created from a pointer (such as with `ptr::as_ref()`), it
is associated with the static lifetime, and as a result, the compiler is
unable to determine whether the reference will live long enough for its
intended use.  While functions like `ptr::as_ref()` can associate the
reference with a lifetime, the compiler cannot usefully use this
information to enfoce lifetime checks on pointers generated in this way
because although every reference can be bound to a lifetime, a reference
does not by itself own a lifetime, and without an owning lifetime, the
compiler has no way to know when the lifetime to which the reference is
bound goes out of scope.  The `ScopedRef` and `ScopedMut` objects solve
this by creating a new object every time a pointer is converted to a
reference, so there is an actual object with an associated lifetime that
the compiler can use to ensure that the reference remains valid.

Signed-off-by: Jon Lange <jlange@microsoft.com>
---
 kernel/src/utils/mod.rs    |   2 +
 kernel/src/utils/scoped.rs | 107 +++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 kernel/src/utils/scoped.rs

diff --git a/kernel/src/utils/mod.rs b/kernel/src/utils/mod.rs
index 5390225c5..2b78ab1f6 100644
--- a/kernel/src/utils/mod.rs
+++ b/kernel/src/utils/mod.rs
@@ -8,9 +8,11 @@ pub mod bitmap_allocator;
 pub mod fw_meta;
 pub mod immut_after_init;
 pub mod memory_region;
+pub mod scoped;
 pub mod util;
 
 pub use memory_region::MemoryRegion;
+pub use scoped::{ScopedMut, ScopedRef};
 pub use util::{
     align_down, align_up, halt, is_aligned, overlap, page_align_up, page_offset, zero_mem_region,
 };
diff --git a/kernel/src/utils/scoped.rs b/kernel/src/utils/scoped.rs
new file mode 100644
index 000000000..4b0584d53
--- /dev/null
+++ b/kernel/src/utils/scoped.rs
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: MIT OR Apache-2.0
+//
+// Copyright (c) Microsoft Corporation
+//
+// Author: Jon Lange (jlange@microsoft.com)
+
+use core::ops::{Deref, DerefMut};
+
+/// `ScopedRef` and `ScopedMut` are designed to solve the problem of managing
+/// lifetimes of references created from pointers.  Normally, when a reference
+/// is created from a pointer (such as with `ptr::as_ref()`), it is associated
+/// with the static lifetime, and as a result, the compiler is unable to
+/// determine whether the reference will live long enough for its intended
+/// use.  While functions like `ptr::as_ref()` can associate the reference with
+/// a lifetime, the compiler cannot usefully use this information to enforce
+/// lifetime checks on pointers generated in this way because although every
+/// reference can be bound to a lifetime, a reference does not by itself own a
+/// lifetime, and without an owning lifetime, the compiler has no way to know
+/// when the lifetime to which the reference is bound goes out of scope.
+/// The `ScopedRef` and `ScopedMut` objects solve this by creating a new object
+/// every time a pointer is converted to a reference, so there is an actual
+/// object with an associated lifetime that the compiler can use to ensure that
+/// the reference remains valid.
+
+#[derive(Debug)]
+pub struct ScopedRef<'a, T> {
+    inner: &'a T,
+}
+
+impl<T> ScopedRef<'_, T> {
+    /// Generates a new `ScopedRef` from a pointer.
+    ///
+    /// # Safety
+    ///
+    /// This is a dereference of a raw pointer, and no correctness checks are
+    /// performed.
+    pub unsafe fn new(ptr: *const T) -> Option<Self> {
+        // SAFETY: the caller guarantees the safety of the pointer.
+        unsafe { ptr.as_ref().map(|inner| Self { inner }) }
+    }
+}
+
+impl<T> AsRef<T> for ScopedRef<'_, T> {
+    fn as_ref(&self) -> &T {
+        self.inner
+    }
+}
+
+impl<T> Deref for ScopedRef<'_, T> {
+    type Target = T;
+
+    fn deref(&self) -> &T {
+        self.as_ref()
+    }
+}
+
+impl<T> Drop for ScopedRef<'_, T> {
+    fn drop(&mut self) {}
+}
+
+#[derive(Debug)]
+pub struct ScopedMut<'a, T> {
+    inner: &'a mut T,
+}
+
+impl<T> ScopedMut<'_, T> {
+    /// Generates a new `ScopedMut` from a pointer.
+    ///
+    /// # Safety
+    ///
+    /// This is a dereference of a raw pointer, and no correctness checks are
+    /// performed.
+    pub unsafe fn new(ptr: *mut T) -> Option<Self> {
+        // SAFETY: the caller guarantees the safety of the pointer.
+        unsafe { ptr.as_mut().map(|inner| Self { inner }) }
+    }
+}
+
+impl<T> AsRef<T> for ScopedMut<'_, T> {
+    fn as_ref(&self) -> &T {
+        self.inner
+    }
+}
+
+impl<T> AsMut<T> for ScopedMut<'_, T> {
+    fn as_mut(&mut self) -> &mut T {
+        self.inner
+    }
+}
+
+impl<T> Deref for ScopedMut<'_, T> {
+    type Target = T;
+
+    fn deref(&self) -> &T {
+        self.as_ref()
+    }
+}
+
+impl<T> DerefMut for ScopedMut<'_, T> {
+    fn deref_mut(&mut self) -> &mut T {
+        self.as_mut()
+    }
+}
+
+impl<T> Drop for ScopedMut<'_, T> {
+    fn drop(&mut self) {}
+}

From d82bf83005f97258657e70591c0c8948cdf9d57e Mon Sep 17 00:00:00 2001
From: Jon Lange <jlange@microsoft.com>
Date: Wed, 11 Dec 2024 13:22:06 -0800
Subject: [PATCH 4/5] cpu: record an address of `PerCpuShared` that is globally
 valid

The address of a `PerCpuShared` as observed by the local CPU is
different than the address of the same `PerCpuShared` observed by other
CPUs in the global address space.  There are cases where a local CPU
needs to know the address that will be used by other CPUs.

Signed-off-by: Jon Lange <jlange@microsoft.com>
---
 kernel/src/cpu/percpu.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs
index e4d75c1ec..15e630c28 100644
--- a/kernel/src/cpu/percpu.rs
+++ b/kernel/src/cpu/percpu.rs
@@ -322,6 +322,10 @@ pub struct PerCpu {
     /// Per-CPU storage that might be accessed from other CPUs.
     shared: PerCpuShared,
 
+    /// Reference to the `PerCpuShared` that is valid in the global, shared
+    /// address space.
+    shared_global: OnceCell<&'static PerCpuShared>,
+
     /// PerCpu IRQ state tracking
     irq_state: IrqState,
 
@@ -383,6 +387,7 @@ impl PerCpu {
             apic: RefCell::new(None),
 
             shared: PerCpuShared::new(apic_id, cpu_index),
+            shared_global: OnceCell::new(),
             ghcb: OnceCell::new(),
             hypercall_pages: RefCell::new(None),
             hv_doorbell: Cell::new(None),
@@ -401,6 +406,7 @@ impl PerCpu {
         let cpu_index = PERCPU_AREAS.next_cpu_index();
         let page = PageBox::try_new(Self::new(apic_id, cpu_index))?;
         let percpu = PageBox::leak(page);
+        percpu.set_shared_global();
         unsafe { PERCPU_AREAS.push(PerCpuInfo::new(apic_id, &percpu.shared)) };
         Ok(percpu)
     }
@@ -409,6 +415,14 @@ impl PerCpu {
         &self.shared
     }
 
+    fn set_shared_global(&'static self) {
+        self.shared_global.set(&self.shared).expect("shared global set more than once");
+    }
+
+    pub fn shared_global(&self) -> &'static PerCpuShared {
+        self.shared_global.get().unwrap()
+    }
+
     /// Disables IRQs on the current CPU. Keeps track of the nesting level and
     /// the original IRQ state.
     ///

From 2618d36151e8f40c09e57a54e2092262a113bd53 Mon Sep 17 00:00:00 2001
From: Jon Lange <jlange@microsoft.com>
Date: Sun, 24 Nov 2024 20:58:15 -0800
Subject: [PATCH 5/5] cpu: implement inter-processor interrupts

This change provides routines that enable one processor to send a
message to one or other processors to perform work.

Signed-off-by: Jon Lange <jlange@microsoft.com>
---
 kernel/src/cpu/apic.rs       |   6 +-
 kernel/src/cpu/idt/common.rs |   1 +
 kernel/src/cpu/idt/entry.S   |   3 +
 kernel/src/cpu/idt/svsm.rs   |  19 +-
 kernel/src/cpu/ipi.rs        | 623 +++++++++++++++++++++++++++++++++++
 kernel/src/cpu/mod.rs        |   1 +
 kernel/src/cpu/percpu.rs     |  69 +++-
 kernel/src/types.rs          |   2 +
 8 files changed, 714 insertions(+), 10 deletions(-)
 create mode 100644 kernel/src/cpu/ipi.rs

diff --git a/kernel/src/cpu/apic.rs b/kernel/src/cpu/apic.rs
index 0510ad22b..db43916af 100644
--- a/kernel/src/cpu/apic.rs
+++ b/kernel/src/cpu/apic.rs
@@ -33,7 +33,7 @@ const APIC_REGISTER_ICR: u64 = 0x830;
 const APIC_REGISTER_SELF_IPI: u64 = 0x83F;
 
 #[derive(Debug, PartialEq)]
-enum IcrDestFmt {
+pub enum IcrDestFmt {
     Dest = 0,
     OnlySelf = 1,
     AllWithSelf = 2,
@@ -55,7 +55,7 @@ impl IcrDestFmt {
 }
 
 #[derive(Debug, PartialEq)]
-enum IcrMessageType {
+pub enum IcrMessageType {
     Fixed = 0,
     Unknown = 3,
     Nmi = 4,
@@ -81,7 +81,7 @@ impl IcrMessageType {
 }
 
 #[bitfield(u64)]
-struct ApicIcr {
+pub struct ApicIcr {
     pub vector: u8,
     #[bits(3)]
     pub message_type: IcrMessageType,
diff --git a/kernel/src/cpu/idt/common.rs b/kernel/src/cpu/idt/common.rs
index aa339cced..87c5d41e4 100644
--- a/kernel/src/cpu/idt/common.rs
+++ b/kernel/src/cpu/idt/common.rs
@@ -46,6 +46,7 @@ pub const VC_VECTOR: usize = 29;
 pub const SX_VECTOR: usize = 30;
 
 pub const INT_INJ_VECTOR: usize = 0x50;
+pub const IPI_VECTOR: usize = 0xE0;
 
 bitflags::bitflags! {
     /// Page fault error code flags.
diff --git a/kernel/src/cpu/idt/entry.S b/kernel/src/cpu/idt/entry.S
index ed40f2914..5e7f2434b 100644
--- a/kernel/src/cpu/idt/entry.S
+++ b/kernel/src/cpu/idt/entry.S
@@ -421,4 +421,7 @@ default_entry_no_ist	name=int80	handler=system_call		error_code=0	vector=0x80
 // Interrupt injection vector
 irq_entry	name=int_inj	vector=0x50
 
+// IPI vector.
+irq_entry	name=ipi	vector=0xE0
+
 .popsection
diff --git a/kernel/src/cpu/idt/svsm.rs b/kernel/src/cpu/idt/svsm.rs
index 6dd1d2f58..87d3a73a7 100644
--- a/kernel/src/cpu/idt/svsm.rs
+++ b/kernel/src/cpu/idt/svsm.rs
@@ -11,9 +11,9 @@ use super::super::tss::IST_DF;
 use super::super::vc::handle_vc_exception;
 use super::common::{
     idt_mut, user_mode, IdtEntry, IdtEventType, PageFaultError, AC_VECTOR, BP_VECTOR, BR_VECTOR,
-    CP_VECTOR, DB_VECTOR, DE_VECTOR, DF_VECTOR, GP_VECTOR, HV_VECTOR, INT_INJ_VECTOR, MCE_VECTOR,
-    MF_VECTOR, NMI_VECTOR, NM_VECTOR, NP_VECTOR, OF_VECTOR, PF_VECTOR, SS_VECTOR, SX_VECTOR,
-    TS_VECTOR, UD_VECTOR, VC_VECTOR, XF_VECTOR,
+    CP_VECTOR, DB_VECTOR, DE_VECTOR, DF_VECTOR, GP_VECTOR, HV_VECTOR, INT_INJ_VECTOR, IPI_VECTOR,
+    MCE_VECTOR, MF_VECTOR, NMI_VECTOR, NM_VECTOR, NP_VECTOR, OF_VECTOR, PF_VECTOR, SS_VECTOR,
+    SX_VECTOR, TS_VECTOR, UD_VECTOR, VC_VECTOR, XF_VECTOR,
 };
 use crate::address::VirtAddr;
 use crate::cpu::irq_state::{raw_get_tpr, raw_set_tpr, tpr_from_vector};
@@ -56,6 +56,7 @@ extern "C" {
     fn asm_entry_sx();
     fn asm_entry_int80();
     fn asm_entry_irq_int_inj();
+    fn asm_entry_irq_ipi();
 
     pub static mut HV_DOORBELL_ADDR: usize;
 }
@@ -92,6 +93,7 @@ pub fn early_idt_init() {
 
     // Interupts
     idt.set_entry(0x80, IdtEntry::user_entry(asm_entry_int80));
+    idt.set_entry(IPI_VECTOR, IdtEntry::entry(asm_entry_irq_ipi));
 
     // Load IDT
     idt.load();
@@ -363,9 +365,14 @@ pub fn common_isr_handler(vector: usize) {
     let cpu = this_cpu();
     cpu.irqs_enable();
 
-    // Treat any unhandled interrupt as a spurious interrupt.  Interrupt
-    // injection requests currently require no processing; they occur simply
-    // to ensure an exit from the guest.
+    // Process the requested interrupt vector.
+    match vector {
+        IPI_VECTOR => this_cpu().handle_ipi_interrupt(),
+        _ => {
+            // Ignore all unrecognized interrupt vectors and treat them as
+            // spurious interrupts.
+        }
+    }
 
     // Disable interrupts before restoring TPR.
     cpu.irqs_disable();
diff --git a/kernel/src/cpu/ipi.rs b/kernel/src/cpu/ipi.rs
new file mode 100644
index 000000000..4574e1939
--- /dev/null
+++ b/kernel/src/cpu/ipi.rs
@@ -0,0 +1,623 @@
+// SPDX-License-Identifier: MIT OR Apache-2.0
+//
+// Copyright (c) Microsoft Corporation
+//
+// Author: Jon Lange (jlange@microsoft.com)
+
+use super::apic::{ApicIcr, IcrDestFmt};
+use super::cpuset::{AtomicCpuSet, CpuSet};
+use super::idt::common::IPI_VECTOR;
+use super::percpu::this_cpu;
+use super::percpu::PERCPU_AREAS;
+use super::TprGuard;
+use crate::error::SvsmError;
+use crate::platform::SVSM_PLATFORM;
+use crate::types::{TPR_IPI, TPR_SYNCH};
+use crate::utils::{ScopedMut, ScopedRef};
+
+use core::cell::{Cell, UnsafeCell};
+use core::mem;
+use core::mem::MaybeUninit;
+use core::ptr;
+use core::sync::atomic::{AtomicUsize, Ordering};
+
+/// This module implements inter-processor interrupt support, including the
+/// ability to send and receive messages across CPUs.  Two types of IPI
+/// messages are supported: multicast and unicast.  Sending a multicast IPI
+/// will cause a message to be delivered to one or more CPUs as a shared
+/// reference.  Sending a unicast IPI will cause a message to be delivered to a
+/// single target as a mutable reference, permitting the receiving processor to
+/// modify the contents of the message such that the sender of the message can
+/// observe the response.  In all cases, the request to send a message will not
+/// complete until all receiving CPUs have completed handling the request.
+///
+/// Multicast IPIs can be used to target a single CPU, an arbitrary set of
+/// CPUs, or all CPUs (optionally including or excluding the sending CPU).
+///
+/// Sending an IPI requires the ability to raise TPR to TPR_SYNCH.  If the
+/// current TPR is already above TPR_SYNCH, then the IPI request will panic.
+///
+/// Two traits support the delivery of IPI messages: `IpiMessage` and
+/// `IpiMessageMut`.  Each of these traits requires an implementation of an
+/// `invoke` method which will be called on every receiving CPU to handle
+/// the message.  The `invoke` method will be called on every receving CPU,
+/// including on the sending CPU if it is a selected target.  The `invoke`
+/// method is always called with TPR equal to TPR_IPI.  TPR-sensitive locks
+/// may not be used unless they are designed to be held at TPR_IPI.  TPR_IPI
+/// is higher than TPR_SYNCH, so it is not possible to send an IPI from an
+/// IPI handler.
+///
+/// All IPI messages that can be sent as a multicast IPI must implement `Sync`
+/// in addition to implementing `IpiMessage` because these messages will be
+/// processed simultaneously by multiple CPUs, requiring cross-thread
+/// synchronization.  `Sync` is not required for unicast messages, since those
+/// messages can only be processed by a single processor at a time.
+
+/// The `IpiTarget` enum describes the set of CPUs that should receive a
+/// multicast IPI.  There are four variants.
+/// * `Single` indicates a single CPU, described by CPU index (*not* APIC ID).
+/// * `Multiple` contains a `CpuSet`, which is a bitmap of multiple CPUs
+///   selected by CPU index.
+/// * `AllButSelf` indicates all CPUs other than the sending processor.
+/// * `All` indicates all CPUs.
+#[derive(Clone, Copy, Debug)]
+pub enum IpiTarget<'a> {
+    Single(usize),
+    Multiple(&'a CpuSet),
+    AllButSelf,
+    All,
+}
+
+/// # Safety
+/// This trait implements a method to copy IPI message contents into a shared
+/// buffer.  If that serialization is performed incorrectly, then IPI message
+/// receipt will be unsound because the message may contain incorrect pointers
+/// and references that refer to invalid memory - or memory that belongs to
+/// another owner.  All implementations of this trait must verify that the
+/// copy routine correctly copies all data and resolves all references within
+/// the copied data.
+pub unsafe trait IpiMessage {
+    /// All IPI messages must be copied into a shared IPI buffer since stack
+    /// locals are not visible across CPU/task contexts.  This function must
+    /// perform a deep copy of the contents of the source buffer into the
+    /// shared destination buffer.
+    ///
+    /// Arguments:
+    ///
+    /// *`src`: A pointer to the input message.
+    /// *`buffer`: A byte slice in shared memory which will be the target of the copy.
+    fn copy_to_shared(&self, buffer: &mut [u8])
+    where
+        Self: Sized,
+    {
+        let size = mem::size_of::<Self>();
+        assert!(size <= buffer.len());
+        // SAFETY: the target buffer is known not to overlap the `self` object,
+        // and the assertion above proves that the target buffer is large
+        // enough to receive a copy of the object.
+        unsafe {
+            ptr::copy_nonoverlapping(ptr::from_ref(self) as *const u8, buffer.as_mut_ptr(), size);
+        }
+    }
+
+    /// Invokes the IPI handler for the message.
+    fn invoke(&self);
+}
+
+/// # Safety
+/// This trait implements a method to copy IPI message contents into a shared
+/// buffer.  If that serialization is performed incorrectly, then IPI message
+/// receipt will be unsound because the message may contain incorrect pointers
+/// and references that refer to invalid memory - or memory that belongs to
+/// another owner.  The same applies to the method of this trait that copies
+/// modified IPI message contents back to the caller's IPI message structure.
+/// All implementations of this trait must verify that the copy routines
+/// correctly copy all data and resolve all references within the copied data.
+pub unsafe trait IpiMessageMut {
+    /// All IPI messages must be copied into a shared IPI buffer since stack
+    /// locals are not visible across CPU/task contexts.  This function must
+    /// perform a deep copy of the contents of the source buffer into the
+    /// shared destination buffer.
+    ///
+    /// Arguments:
+    ///
+    /// *`src`: A pointer to the input message.
+    /// *`buffer`: A byte slice in shared memory which will be the target of the copy.
+    fn copy_to_shared(&self, buffer: &mut [u8])
+    where
+        Self: Sized,
+    {
+        let size = mem::size_of::<Self>();
+        assert!(size <= buffer.len());
+        // SAFETY: the target buffer is known not to overlap the `self` object,
+        // and the assertion above proves that the target buffer is large
+        // enough to receive a copy of the object.
+        unsafe {
+            ptr::copy_nonoverlapping(ptr::from_ref(self) as *const u8, buffer.as_mut_ptr(), size);
+        }
+    }
+
+    /// Copies the result of the unicast IPI back into the original message
+    /// buffer.
+    ///
+    /// Arguments:
+    ///
+    /// *`src`: A pointer to the input message.
+    /// *`buffer`: A byte slice in shared memory which will be the target of the copy.
+    fn copy_from_shared(&mut self, shared_buffer: &Self)
+    where
+        Self: Sized,
+    {
+        // SAFETY: the contents of object are to be moved from the shared
+        // buffer back to the caller's object, so no drop can be permitted.
+        // A pointer copy is used to perofrm this move.
+        unsafe {
+            ptr::copy_nonoverlapping(ptr::from_ref(shared_buffer), ptr::from_mut(self), 1);
+        }
+    }
+
+    fn invoke(&mut self);
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum IpiRequest {
+    IpiMut,
+    IpiShared,
+}
+
+#[derive(Debug)]
+pub struct IpiBoard {
+    // The number of CPUs that have yet to complete the request.
+    pending: AtomicUsize,
+
+    // The request description.
+    request: Cell<MaybeUninit<IpiRequest>>,
+
+    // Space to store the IPI message being sent.
+    message: UnsafeCell<MaybeUninit<[u8; 1024]>>,
+
+    // A function pointer that will handle the IPI on the receiving CPU.
+    handler: Cell<MaybeUninit<unsafe fn(*const ())>>,
+}
+
+// The IpiHelper trait exists to abstract the difference between use of
+// IpiMessage and IpiMessageMut in the IPI send and receive logic.
+pub trait IpiHelper {
+    fn request_type(&self) -> IpiRequest;
+    fn copy_to_shared(&self, shared_buffer: &mut [u8]);
+    fn copy_from_shared(&mut self, shared_buffer: *const ());
+    fn get_invoke_routine(&self) -> unsafe fn(*const ());
+}
+
+#[derive(Debug)]
+pub struct IpiHelperShared<'a, T: IpiMessage + Sync> {
+    message: &'a T,
+}
+
+impl<'a, T: IpiMessage + Sync> IpiHelperShared<'a, T> {
+    pub fn new(message: &'a T) -> Self {
+        Self { message }
+    }
+
+    // SAFETY: The IPI logic is guaranteed to call this function only when
+    // passing a pointer to type `T`.
+    unsafe fn invoke(message: *const ()) {
+        // SAFETY: The calling IPI logic has guaranteed the correctness of
+        // the input pointer.
+        let msg = unsafe { ScopedRef::new(message as *const T).unwrap() };
+        msg.invoke();
+    }
+}
+
+impl<T: IpiMessage + Sync> IpiHelper for IpiHelperShared<'_, T> {
+    fn request_type(&self) -> IpiRequest {
+        IpiRequest::IpiShared
+    }
+
+    fn copy_to_shared(&self, shared_buffer: &mut [u8]) {
+        self.message.copy_to_shared(shared_buffer);
+    }
+
+    fn copy_from_shared(&mut self, _shared_buffer: *const ()) {
+        // A shared IPI does not copy back any results.
+    }
+
+    // SAFETY: The IPI logic is guaranteed to call this function only when
+    // passing a pointer to type `T`.
+    fn get_invoke_routine(&self) -> unsafe fn(*const ()) {
+        Self::invoke
+    }
+}
+
+#[derive(Debug)]
+pub struct IpiHelperMut<'a, T: IpiMessageMut> {
+    message: &'a mut T,
+}
+
+impl<'a, T: IpiMessageMut> IpiHelperMut<'a, T> {
+    pub fn new(message: &'a mut T) -> Self {
+        Self { message }
+    }
+
+    // SAFETY: The IPI logic is guaranteed to call this function only when
+    // passing a pointer to type `T`.
+    unsafe fn invoke(message: *const ()) {
+        // SAFETY: The calling IPI logic has guaranteed the correctness of
+        // the input pointer.
+        let mut msg = unsafe { ScopedMut::new(message as *mut T).unwrap() };
+        msg.invoke();
+    }
+}
+
+impl<T: IpiMessageMut> IpiHelper for IpiHelperMut<'_, T> {
+    fn request_type(&self) -> IpiRequest {
+        IpiRequest::IpiMut
+    }
+
+    fn copy_to_shared(&self, shared_buffer: &mut [u8]) {
+        self.message.copy_to_shared(shared_buffer);
+    }
+
+    fn copy_from_shared(&mut self, shared_buffer: *const ()) {
+        // SAFETY: the IPI logic guarantees that the shared buffer will contain
+        // an object of type `T`.
+        unsafe {
+            let shared = shared_buffer as *const T;
+            self.message.copy_from_shared(shared.as_ref().unwrap());
+        }
+    }
+
+    fn get_invoke_routine(&self) -> unsafe fn(*const ()) {
+        Self::invoke
+    }
+}
+
+impl Default for IpiBoard {
+    fn default() -> Self {
+        Self {
+            request: Cell::new(MaybeUninit::zeroed()),
+            pending: AtomicUsize::new(0),
+            message: UnsafeCell::new(MaybeUninit::uninit()),
+            handler: Cell::new(MaybeUninit::uninit()),
+        }
+    }
+}
+
+// This function is the IPI workhorse.  As input, it takes an IpiHelper which
+// is the interface to the correct IPI message trait implementation.  This
+// is consumed as a dynamic dispatch trait to avoid explosion due to multiple
+// generic message implementations.
+pub fn send_ipi(
+    target_set: IpiTarget<'_>,
+    sender_cpu_index: usize,
+    ipi_helper: &mut dyn IpiHelper,
+    ipi_board: &IpiBoard,
+) {
+    // Raise TPR to synch level to prevent reentrant attempts to send an IPI.
+    let tpr_guard = TprGuard::raise(TPR_SYNCH);
+
+    // Initialize the IPI board to describe this request.  Since no request
+    // can be outstanding right now, the pending count must be zero, and
+    // there can be no other CPUs that are have taken references to the IPI
+    // board.
+    assert_eq!(ipi_board.pending.load(Ordering::Relaxed), 0);
+    ipi_board
+        .request
+        .set(MaybeUninit::new(ipi_helper.request_type()));
+    ipi_board
+        .handler
+        .set(MaybeUninit::new(ipi_helper.get_invoke_routine()));
+    // SAFETY: the IPI board is known to be in an uninitialized state and
+    // because the request mask on the target CPUs have not yet been updated
+    // to indicate a pending message from this CPU, there are no other threads
+    // that could be examining the IPI board at this time.  It can safely
+    // be populated with a copy of the message.
+    unsafe {
+        let cell = &mut *ipi_board.message.get();
+        let message_buf = &mut *cell.as_mut_ptr();
+        ipi_helper.copy_to_shared(message_buf);
+    }
+
+    // Create a local copy of the interrupt target set since the input target
+    // set may need to be modified before the interrupt can be sent.  A local
+    // `CpuSet` is reserved in case it needs to be copied and modified as well.
+    let mut interrupt_target = target_set;
+    let mut interrupt_set: MaybeUninit<CpuSet> = MaybeUninit::uninit();
+
+    // Enumerate all CPUs in the target set to advise that an IPI message has
+    // been posted.
+    let mut include_self = false;
+    let mut send_interrupt = false;
+    match target_set {
+        IpiTarget::Single(cpu_index) => {
+            if cpu_index == sender_cpu_index {
+                include_self = true;
+            } else {
+                ipi_board.pending.store(1, Ordering::Relaxed);
+                PERCPU_AREAS
+                    .get_by_cpu_index(cpu_index)
+                    .ipi_from(sender_cpu_index);
+                send_interrupt = true;
+            }
+        }
+        IpiTarget::Multiple(cpu_set) => {
+            for cpu_index in cpu_set.iter() {
+                if cpu_index == sender_cpu_index {
+                    include_self = true;
+                } else {
+                    ipi_board.pending.fetch_add(1, Ordering::Relaxed);
+                    PERCPU_AREAS
+                        .get_by_cpu_index(cpu_index)
+                        .ipi_from(sender_cpu_index);
+                    send_interrupt = true;
+                }
+            }
+            if include_self {
+                // The CPU set used to send the interrupt must be modified to
+                // remove the current CPU.  This cannot be done in place,
+                // because the input CPU set is immutable.  Instead, construct
+                // a copy and change the local IPI target to refer to the
+                // local copy.
+                let new_cpu_set = interrupt_set.write(*cpu_set);
+                new_cpu_set.remove(sender_cpu_index);
+                interrupt_target = IpiTarget::Multiple(new_cpu_set);
+            }
+        }
+        _ => {
+            for cpu in PERCPU_AREAS.iter() {
+                ipi_board.pending.fetch_add(1, Ordering::Relaxed);
+                cpu.as_cpu_ref().ipi_from(sender_cpu_index);
+            }
+            send_interrupt = true;
+
+            // Remove the current CPU from the target set and completion
+            // calculation, since no interrupt is required to ensure that
+            // IPI handlng can be performed locally.
+            ipi_board.pending.fetch_sub(1, Ordering::Relaxed);
+
+            // Only include the current CPU if requested.
+            if let IpiTarget::All = target_set {
+                include_self = true;
+                interrupt_target = IpiTarget::AllButSelf;
+            }
+        }
+    }
+
+    // Send the IPI message.
+    if send_interrupt {
+        send_ipi_irq(interrupt_target).expect("Failed to post IPI interrupt");
+    }
+
+    // If sending to the current processor, then handle the message locally.
+    if include_self {
+        // Raise TPR to IPI level for consistency with IPI interrupt handling.
+        let ipi_tpr_guard = TprGuard::raise(TPR_IPI);
+
+        // SAFETY: the local IPI board is known to be in the correct state
+        // for processing.
+        unsafe {
+            receive_single_ipi(ipi_board);
+        }
+        drop(ipi_tpr_guard);
+    }
+
+    // Wait until all other CPUs have completed their processing of the
+    // message.  This is required to ensure that no other threads can be
+    // examining the IPI board.
+    //
+    // Note that because the current TPR is TPR_SYNCH, which is lower than
+    // TPR_IPI, any other IPIs that arrive while waiting here will interrupt
+    // this spin loop and will be processed correctly.
+    while ipi_board.pending.load(Ordering::Acquire) != 0 {
+        core::hint::spin_loop();
+    }
+
+    // Perform any result copy required by the IPI.
+    ipi_helper.copy_from_shared(ipi_board.message.get() as *const ());
+
+    drop(tpr_guard);
+}
+
+fn send_single_ipi_irq(cpu_index: usize, icr: ApicIcr) -> Result<(), SvsmError> {
+    let cpu = PERCPU_AREAS.get_by_cpu_index(cpu_index);
+    SVSM_PLATFORM.post_irq(icr.with_destination(cpu.apic_id()).into())
+}
+
+fn send_ipi_irq(target_set: IpiTarget<'_>) -> Result<(), SvsmError> {
+    let icr = ApicIcr::new().with_vector(IPI_VECTOR as u8);
+    match target_set {
+        IpiTarget::Single(cpu_index) => send_single_ipi_irq(cpu_index, icr)?,
+        IpiTarget::Multiple(cpu_set) => {
+            for cpu_index in cpu_set.iter() {
+                send_single_ipi_irq(cpu_index, icr)?;
+            }
+        }
+        IpiTarget::AllButSelf => SVSM_PLATFORM.post_irq(
+            icr.with_destination_shorthand(IcrDestFmt::AllButSelf)
+                .into(),
+        )?,
+        IpiTarget::All => SVSM_PLATFORM.post_irq(
+            icr.with_destination_shorthand(IcrDestFmt::AllWithSelf)
+                .into(),
+        )?,
+    }
+    Ok(())
+}
+
+/// # Safety
+/// The caller must take responsibility to ensure that the message pointer in
+/// the request is valid.  This is normally ensured by assuming the lifetime
+/// of the request pointer is protected by the lifetime of the bulletin board
+/// that posts it.
+unsafe fn receive_single_ipi(board: &IpiBoard) {
+    // SAFETY: since the caller has indicated that this IPI board is valid,
+    // all fields of the IPI board can be assumed to have the correct semantics
+    // and can be accessed via raw pointers.
+    unsafe {
+        let request = board.request.get().assume_init();
+        let message = board.message.get() as *const ();
+        match request {
+            IpiRequest::IpiShared => {
+                let handler = board.handler.get().assume_init();
+                handler(message);
+            }
+            IpiRequest::IpiMut => {
+                // SAFETY: the sending CPU has guaranteed that no other CPU
+                // can be looking at this IPI board, and the sending CPU is
+                // also spinning while waiting for this request to be
+                // processed.  Since no other thread can be examining this
+                // data, it can safely be viewed through a mutable reference.
+                let handler = mem::transmute::<unsafe fn(*const ()), unsafe fn(*mut ())>(
+                    board.handler.get().assume_init(),
+                );
+                handler(message as *mut ());
+            }
+        }
+    }
+}
+
+pub fn handle_ipi_interrupt(request_set: &AtomicCpuSet) {
+    // Enumerate all CPUs in the request set and process the request identified
+    // by each.
+    for cpu_index in request_set.iter(Ordering::Acquire) {
+        // Handle the request posted on the bulletin board of the requesting
+        // CPU.
+        let cpu = PERCPU_AREAS.get_by_cpu_index(cpu_index);
+
+        // SAFETY: The IPI board is known to be valid since the sending CPU
+        // marked it as valid in this CPU's request bitmap.  The IPI board
+        // is guaranteed to remain valid until the pending count is
+        // decremented.
+        unsafe {
+            let ipi_board = cpu.ipi_board();
+            receive_single_ipi(cpu.ipi_board());
+
+            // Now that the request has been handled, decrement the count of
+            // pending requests on the sender's bulletin board.  The IPI
+            // board may cease to be valid as soon as this decrement
+            // completes.
+            ipi_board.pending.fetch_sub(1, Ordering::Release);
+        }
+    }
+}
+
+/// Sends an IPI message to multiple CPUs.
+///
+/// # Safety
+/// The IPI message must NOT contain any references to data unless that
+/// data is known to be in memory that is visible across CPUs/tasks.
+/// Otherwise, the recipient could attempt to access a pointer that is
+/// invalid in the target context, or - worse - points to completely
+/// incorrect data in the target context.
+///
+/// # Arguments
+///
+/// * `target_set` - The set of CPUs to which to send the IPI.
+/// * `ipi_message` - The message to send.
+pub fn send_multicast_ipi<M: IpiMessage + Sync>(target_set: IpiTarget<'_>, ipi_message: &M) {
+    this_cpu().send_multicast_ipi(target_set, ipi_message);
+}
+
+/// Sends an IPI message to a single CPU.  Because only a single CPU can
+/// receive the message, the message object can be mutable.
+///
+/// # Arguments
+///
+/// * `cpu_index` - The index of the CPU to receive the message.
+/// * `ipi_message` - The message to send.
+///
+/// # Returns
+///
+/// The response message generated by the IPI recipient.
+pub fn send_unicast_ipi<M: IpiMessageMut>(cpu_index: usize, ipi_message: &mut M) {
+    this_cpu().send_unicast_ipi(cpu_index, ipi_message);
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::cpu::ipi::*;
+
+    #[derive(Debug)]
+    struct TestIpi<'a> {
+        value: usize,
+        cpu_index: usize,
+        drop_count: &'a mut usize,
+    }
+
+    impl<'a> TestIpi<'a> {
+        fn new(value: usize, drop_count: &'a mut usize) -> Self {
+            Self {
+                value,
+                drop_count,
+                cpu_index: this_cpu().shared().cpu_index(),
+            }
+        }
+    }
+
+    impl Drop for TestIpi<'_> {
+        fn drop(&mut self) {
+            // Drop must only be called on the CPU that created the message.
+            // Otherwise, the drop count reference may point to the wrong
+            // data.
+            assert_eq!(this_cpu().shared().cpu_index(), self.cpu_index);
+            *self.drop_count += 1;
+        }
+    }
+
+    /// # Safety
+    /// The test IPI method has no references that are consumed as part of the
+    /// message (the `drop_count` reference is only used on the sending the
+    /// CPU, and this is enforced in the drop method) and therefore the message
+    /// can safely use the default copy implementations from the IPI message
+    /// traits.
+    unsafe impl IpiMessage for TestIpi<'_> {
+        fn invoke(&self) {
+            assert_eq!(self.value, 4);
+        }
+    }
+
+    /// # Safety
+    /// The test IPI method has no references that are consumed as part of the
+    /// message (the `drop_count` reference is only used on the sending the
+    /// CPU, and this is enforced in the drop method) and therefore the message
+    /// can safely use the default copy implementations from the IPI message
+    /// traits.
+    unsafe impl IpiMessageMut for TestIpi<'_> {
+        fn invoke(&mut self) {
+            self.value += 1;
+        }
+    }
+
+    #[test]
+    #[cfg_attr(not(test_in_svsm), ignore = "Can only be run inside guest")]
+    fn test_ipi() {
+        // IPI testing is only possible on platforms that support SVSM
+        // interrupts.
+        if SVSM_PLATFORM.use_interrupts() {
+            let mut drop_count: usize = 0;
+            let message = TestIpi::new(4, &mut drop_count);
+            send_multicast_ipi(IpiTarget::All, &message);
+            drop(message);
+            // Verify that `drop()` was called exactly once on thie IPI
+            // message.
+            assert_eq!(drop_count, 1);
+        }
+    }
+
+    #[test]
+    #[cfg_attr(not(test_in_svsm), ignore = "Can only be run inside guest")]
+    fn test_mut_ipi() {
+        // IPI testing is only possible on platforms that support SVSM
+        // interrupts.
+        if SVSM_PLATFORM.use_interrupts() {
+            let mut drop_count: usize = 0;
+            let mut message = TestIpi::new(4, &mut drop_count);
+            send_unicast_ipi(0, &mut message);
+            assert_eq!(message.value, 5);
+            drop(message);
+            // Verify that `drop()` was called exactly once on thie IPI
+            // message.
+            assert_eq!(drop_count, 1);
+        }
+    }
+}
diff --git a/kernel/src/cpu/mod.rs b/kernel/src/cpu/mod.rs
index 1819026bf..3299e0f45 100644
--- a/kernel/src/cpu/mod.rs
+++ b/kernel/src/cpu/mod.rs
@@ -13,6 +13,7 @@ pub mod extable;
 pub mod features;
 pub mod gdt;
 pub mod idt;
+pub mod ipi;
 pub mod irq_state;
 pub mod isst;
 pub mod mem;
diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs
index 15e630c28..93d00ddc4 100644
--- a/kernel/src/cpu/percpu.rs
+++ b/kernel/src/cpu/percpu.rs
@@ -6,7 +6,12 @@
 
 extern crate alloc;
 
+use super::cpuset::AtomicCpuSet;
 use super::gdt::GDT;
+use super::ipi::{
+    handle_ipi_interrupt, send_ipi, IpiBoard, IpiHelperMut, IpiHelperShared, IpiMessage,
+    IpiMessageMut, IpiTarget,
+};
 use super::isst::Isst;
 use super::msr::write_msr;
 use super::shadow_stack::{is_cet_ss_supported, ISST_ADDR};
@@ -224,6 +229,13 @@ pub struct PerCpuShared {
     ipi_irr: [AtomicU32; 8],
     ipi_pending: AtomicBool,
     nmi_pending: AtomicBool,
+
+    // A set of CPUs that have requested IPI handling by this CPU.
+    ipi_requests: AtomicCpuSet,
+
+    // A bulletin board holding the state of an IPI message to send to other
+    // CPUs.
+    ipi_board: IpiBoard,
 }
 
 impl PerCpuShared {
@@ -236,6 +248,8 @@ impl PerCpuShared {
             ipi_irr: core::array::from_fn(|_| AtomicU32::new(0)),
             ipi_pending: AtomicBool::new(false),
             nmi_pending: AtomicBool::new(false),
+            ipi_requests: Default::default(),
+            ipi_board: IpiBoard::default(),
         }
     }
 
@@ -307,6 +321,18 @@ impl PerCpuShared {
     pub fn nmi_pending(&self) -> bool {
         self.nmi_pending.swap(false, Ordering::Relaxed)
     }
+
+    pub fn ipi_from(&self, cpu_index: usize) {
+        self.ipi_requests.add(cpu_index, Ordering::Release);
+    }
+
+    /// # Safety
+    /// The IPI board is not `Sync`, so the caller is responsible for ensuring
+    /// that a reference to the IPI board is only obtained when it is safe to
+    /// do so.
+    pub unsafe fn ipi_board(&self) -> &IpiBoard {
+        &self.ipi_board
+    }
 }
 
 const _: () = assert!(size_of::<PerCpu>() <= PAGE_SIZE);
@@ -416,7 +442,9 @@ impl PerCpu {
     }
 
     fn set_shared_global(&'static self) {
-        self.shared_global.set(&self.shared).expect("shared global set more than once");
+        self.shared_global
+            .set(&self.shared)
+            .expect("shared global set more than once");
     }
 
     pub fn shared_global(&self) -> &'static PerCpuShared {
@@ -497,6 +525,45 @@ impl PerCpu {
         self.irq_state.lower_tpr(tpr_value);
     }
 
+    /// Sends an IPI message to multiple CPUs.
+    ///
+    /// * `target_set` - The set of CPUs to which to send the IPI.
+    /// * `ipi_message` - The message to send.
+    pub fn send_multicast_ipi<M: IpiMessage + Sync>(
+        &self,
+        target_set: IpiTarget<'_>,
+        ipi_message: &M,
+    ) {
+        let mut ipi_helper = IpiHelperShared::new(ipi_message);
+        send_ipi(
+            target_set,
+            self.shared.cpu_index,
+            &mut ipi_helper,
+            &self.shared_global().ipi_board,
+        );
+    }
+
+    /// Sends an IPI message to a single CPU.  Because only a single CPU can
+    /// receive the message, the message object can be mutable.
+    ///
+    /// # Arguments
+    /// * `cpu_index` - The index of the CPU to receive the message.
+    /// * `ipi_message` - The message to send.
+    pub fn send_unicast_ipi<M: IpiMessageMut>(&self, cpu_index: usize, ipi_message: &mut M) {
+        let mut ipi_helper = IpiHelperMut::new(ipi_message);
+        send_ipi(
+            IpiTarget::Single(cpu_index),
+            self.shared.cpu_index,
+            &mut ipi_helper,
+            &self.shared_global().ipi_board,
+        );
+    }
+
+    /// Handles an IPI interrupt.
+    pub fn handle_ipi_interrupt(&self) {
+        handle_ipi_interrupt(&self.shared.ipi_requests);
+    }
+
     /// Sets up the CPU-local GHCB page.
     pub fn setup_ghcb(&self) -> Result<(), SvsmError> {
         let page = GhcbPage::new()?;
diff --git a/kernel/src/types.rs b/kernel/src/types.rs
index 13427468b..85f07e428 100644
--- a/kernel/src/types.rs
+++ b/kernel/src/types.rs
@@ -96,3 +96,5 @@ impl TryFrom<usize> for Bytes {
 
 pub const TPR_NORMAL: usize = 0;
 pub const TPR_LOCK: usize = 2;
+pub const TPR_SYNCH: usize = 13;
+pub const TPR_IPI: usize = 14;