diff --git a/Cargo.lock b/Cargo.lock index c2f893a2f..da3e0c363 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -834,6 +834,7 @@ dependencies = [ "libmstpm", "log", "packit", + "syscall", "test", ] @@ -868,6 +869,10 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syscall" +version = "0.1.0" + [[package]] name = "test" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 842241305..42e174b8c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ members = [ "elf", # Microsoft TPM library "libmstpm", + # syscall interface definitions + "syscall", ] @@ -23,6 +25,7 @@ test = { path = "test" } svsm = { path = "kernel" } elf = { path = "elf" } libmstpm = { path = "libmstpm" } +syscall = { path = "syscall" } # crates.io aes-gcm = { version = "0.10.3", default-features = false } diff --git a/elf/src/load_segments.rs b/elf/src/load_segments.rs index 5b37d0b80..be62335ae 100644 --- a/elf/src/load_segments.rs +++ b/elf/src/load_segments.rs @@ -11,6 +11,7 @@ extern crate alloc; use super::types::*; use super::Elf64AddrRange; use super::Elf64File; +use super::Elf64FileRange; use super::Elf64PhdrFlags; use super::ElfError; use alloc::vec::Vec; @@ -146,6 +147,8 @@ pub struct Elf64ImageLoadVaddrAllocInfo { pub struct Elf64ImageLoadSegment<'a> { /// The virtual address (vaddr) range covering by this segment pub vaddr_range: Elf64AddrRange, + /// The range in the ELF file covering this segment + pub file_range: Elf64FileRange, /// The contents of the segment in the ELF file pub file_contents: &'a [u8], /// Flags associated with this segment @@ -193,6 +196,7 @@ impl<'a> Iterator for Elf64ImageLoadSegmentIterator<'a> { Some(Elf64ImageLoadSegment { vaddr_range, + file_range, file_contents, flags: phdr.p_flags, }) diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 89dded9c2..8e8c01267 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -21,6 +21,7 @@ doctest = true bootlib.workspace = true cpuarch.workspace = true elf.workspace = true +syscall.workspace = true aes-gcm = { workspace = true, features = ["aes", "alloc"] } bitflags.workspace = true diff --git a/kernel/src/address.rs b/kernel/src/address.rs index f7ffbb631..9936d2950 100644 --- a/kernel/src/address.rs +++ b/kernel/src/address.rs @@ -8,6 +8,8 @@ use crate::types::{PAGE_SHIFT, PAGE_SIZE}; use core::fmt; use core::ops; +use core::slice; + // The backing type to represent an address; type InnerAddr = usize; @@ -210,6 +212,11 @@ impl VirtAddr { Self(sign_extend(addr)) } + /// Returns the index into page-table pages of given levels. + pub const fn to_pgtbl_idx(&self) -> usize { + (self.0 >> (12 + L * 9)) & 0x1ffusize + } + #[inline] pub fn as_ptr(&self) -> *const T { self.0 as *const T @@ -251,6 +258,24 @@ impl VirtAddr { pub const fn const_add(&self, offset: usize) -> Self { VirtAddr::new(self.0 + offset) } + + /// Converts the `VirtAddr` to a slice of a given type + /// + /// # Arguments: + /// + /// * `len` - Number of elements of type `T` in the slice + /// + /// # Returns + /// + /// Slice with `len` elements of type `T` + /// + /// # Safety + /// + /// All Safety requirements from [`core::slice::from_raw_parts`] for the + /// data pointed to by the `VirtAddr` apply here as well. + pub unsafe fn to_slice(&self, len: usize) -> &[T] { + slice::from_raw_parts::(self.as_ptr::(), len) + } } impl fmt::Display for VirtAddr { diff --git a/kernel/src/cpu/gdt.rs b/kernel/src/cpu/gdt.rs index 72e627ca7..905a417a3 100644 --- a/kernel/src/cpu/gdt.rs +++ b/kernel/src/cpu/gdt.rs @@ -41,6 +41,14 @@ impl GDTEntry { pub const fn data_64_kernel() -> Self { Self(0x00cf92000000ffffu64) } + + pub const fn code_64_user() -> Self { + Self(0x00affb000000ffffu64) + } + + pub const fn data_64_user() -> Self { + Self(0x00cff2000000ffffu64) + } } const GDT_SIZE: u16 = 8; @@ -57,8 +65,8 @@ impl GDT { GDTEntry::null(), GDTEntry::code_64_kernel(), GDTEntry::data_64_kernel(), - GDTEntry::null(), - GDTEntry::null(), + GDTEntry::code_64_user(), + GDTEntry::data_64_user(), GDTEntry::null(), GDTEntry::null(), GDTEntry::null(), diff --git a/kernel/src/cpu/idt/common.rs b/kernel/src/cpu/idt/common.rs index 0b27ce8e2..08b5a972c 100644 --- a/kernel/src/cpu/idt/common.rs +++ b/kernel/src/cpu/idt/common.rs @@ -47,6 +47,10 @@ pub struct X86ExceptionContext { pub frame: X86InterruptFrame, } +pub fn user_mode(ctxt: &X86ExceptionContext) -> bool { + (ctxt.frame.cs & 3) == 3 +} + #[derive(Copy, Clone, Default, Debug)] #[repr(C, packed)] pub struct IdtEntry { @@ -62,7 +66,23 @@ const IDT_TARGET_MASK_1_SHIFT: u64 = 0; const IDT_TARGET_MASK_2_SHIFT: u64 = 48 - 16; const IDT_TARGET_MASK_3_SHIFT: u64 = 32; -const IDT_TYPE_MASK: u64 = 0xeu64 << 40; // Only interrupt gates for now +const IDT_TYPE_MASK: u8 = 0x0f; +const IDT_TYPE_SHIFT: u64 = 40; +const IDT_TYPE_CALL: u8 = 0x0c; +const IDT_TYPE_INT: u8 = 0x0e; +const IDT_TYPE_TRAP: u8 = 0x0f; + +fn idt_type_mask(t: u8) -> u64 { + ((t & IDT_TYPE_MASK) as u64) << IDT_TYPE_SHIFT +} + +const IDT_DPL_MASK: u8 = 0x03; +const IDT_DPL_SHIFT: u64 = 45; + +fn idt_dpl_mask(dpl: u8) -> u64 { + ((dpl & IDT_DPL_MASK) as u64) << IDT_DPL_SHIFT +} + const IDT_PRESENT_MASK: u64 = 0x1u64 << 47; const IDT_CS_SHIFT: u64 = 16; @@ -70,14 +90,15 @@ const IDT_IST_MASK: u64 = 0x7; const IDT_IST_SHIFT: u64 = 32; impl IdtEntry { - fn create(target: VirtAddr, cs: u16, ist: u8) -> Self { + fn create(target: VirtAddr, cs: u16, desc_type: u8, dpl: u8, ist: u8) -> Self { let vaddr = target.bits() as u64; let cs_mask = (cs as u64) << IDT_CS_SHIFT; let ist_mask = ((ist as u64) & IDT_IST_MASK) << IDT_IST_SHIFT; let low = (vaddr & IDT_TARGET_MASK_1) << IDT_TARGET_MASK_1_SHIFT | (vaddr & IDT_TARGET_MASK_2) << IDT_TARGET_MASK_2_SHIFT - | IDT_TYPE_MASK + | idt_type_mask(desc_type) | IDT_PRESENT_MASK + | idt_dpl_mask(dpl) | cs_mask | ist_mask; let high = (vaddr & IDT_TARGET_MASK_3) >> IDT_TARGET_MASK_3_SHIFT; @@ -86,17 +107,32 @@ impl IdtEntry { } pub fn raw_entry(target: VirtAddr) -> Self { - IdtEntry::create(target, SVSM_CS, 0) + IdtEntry::create(target, SVSM_CS, IDT_TYPE_INT, 0, 0) } pub fn entry(handler: unsafe extern "C" fn()) -> Self { let target = VirtAddr::from(handler as *const ()); - IdtEntry::create(target, SVSM_CS, 0) + IdtEntry::create(target, SVSM_CS, IDT_TYPE_INT, 0, 0) + } + + pub fn user_entry(handler: unsafe extern "C" fn()) -> Self { + let target = VirtAddr::from(handler as *const ()); + IdtEntry::create(target, SVSM_CS, IDT_TYPE_INT, 3, 0) } pub fn ist_entry(handler: unsafe extern "C" fn(), ist: u8) -> Self { let target = VirtAddr::from(handler as *const ()); - IdtEntry::create(target, SVSM_CS, ist) + IdtEntry::create(target, SVSM_CS, IDT_TYPE_INT, 0, ist) + } + + pub fn trap_entry(handler: unsafe extern "C" fn()) -> Self { + let target = VirtAddr::from(handler as *const ()); + IdtEntry::create(target, SVSM_CS, IDT_TYPE_TRAP, 0, 0) + } + + pub fn call_entry(handler: unsafe extern "C" fn()) -> Self { + let target = VirtAddr::from(handler as *const ()); + IdtEntry::create(target, SVSM_CS, IDT_TYPE_CALL, 3, 0) } pub const fn no_handler() -> Self { diff --git a/kernel/src/cpu/idt/entry.S b/kernel/src/cpu/idt/entry.S index 9efa3eb5e..f78f18b80 100644 --- a/kernel/src/cpu/idt/entry.S +++ b/kernel/src/cpu/idt/entry.S @@ -58,10 +58,19 @@ asm_entry_\name: jmp default_return .endm +.globl default_return default_return: + testb $3, 18*8(%rsp) // Check CS in exception frame + jnz return_user pop_regs +default_iret: iretq +return_user: + // Put user-mode specific return code here + pop_regs + jmp default_iret + // #DE Divide-by-Zero-Error Exception (Vector 0) default_entry_no_ist name=de handler=panic error_code=0 vector=0 @@ -136,3 +145,6 @@ default_entry_no_ist name=vc handler=vmm_communication error_code=1 vector=29 // #SX Security Exception (Vector 30) default_entry_no_ist name=sx handler=panic error_code=1 vector=30 + +// INT 0x80 system call handler +default_entry_no_ist name=int80 handler=system_call error_code=0 vector=0x80 diff --git a/kernel/src/cpu/idt/svsm.rs b/kernel/src/cpu/idt/svsm.rs index 09a10f676..51cd728d7 100644 --- a/kernel/src/cpu/idt/svsm.rs +++ b/kernel/src/cpu/idt/svsm.rs @@ -6,21 +6,27 @@ use super::super::control_regs::read_cr2; use super::super::extable::handle_exception_table; -use super::super::percpu::this_cpu; +use super::super::percpu::{current_task, this_cpu}; use super::super::tss::IST_DF; use super::super::vc::handle_vc_exception; use super::common::PF_ERROR_WRITE; use super::common::{ - idt_mut, IdtEntry, AC_VECTOR, BP_VECTOR, BR_VECTOR, CP_VECTOR, DB_VECTOR, DE_VECTOR, DF_VECTOR, - GP_VECTOR, HV_VECTOR, MCE_VECTOR, MF_VECTOR, NMI_VECTOR, NM_VECTOR, NP_VECTOR, OF_VECTOR, - PF_VECTOR, SS_VECTOR, SX_VECTOR, TS_VECTOR, UD_VECTOR, VC_VECTOR, XF_VECTOR, + idt_mut, user_mode, IdtEntry, AC_VECTOR, BP_VECTOR, BR_VECTOR, CP_VECTOR, DB_VECTOR, DE_VECTOR, + DF_VECTOR, GP_VECTOR, HV_VECTOR, MCE_VECTOR, MF_VECTOR, NMI_VECTOR, NM_VECTOR, NP_VECTOR, + OF_VECTOR, PF_VECTOR, SS_VECTOR, SX_VECTOR, TS_VECTOR, UD_VECTOR, VC_VECTOR, XF_VECTOR, }; use crate::address::VirtAddr; use crate::cpu::X86ExceptionContext; use crate::debug::gdbstub::svsm_gdbstub::handle_debug_exception; +use crate::task::{is_task_fault, terminate}; + use core::arch::global_asm; +use crate::syscall::*; +use syscall::*; + extern "C" { + pub fn default_return(); fn asm_entry_de(); fn asm_entry_db(); fn asm_entry_nmi(); @@ -43,6 +49,7 @@ extern "C" { fn asm_entry_hv(); fn asm_entry_vc(); fn asm_entry_sx(); + fn asm_entry_int80(); } fn init_ist_vectors() { @@ -76,6 +83,11 @@ pub fn early_idt_init() { idt.set_entry(HV_VECTOR, IdtEntry::entry(asm_entry_hv)); idt.set_entry(VC_VECTOR, IdtEntry::entry(asm_entry_vc)); idt.set_entry(SX_VECTOR, IdtEntry::entry(asm_entry_sx)); + + // Interupts + idt.set_entry(0x80, IdtEntry::user_entry(asm_entry_int80)); + + // Load IDT idt.load(); } @@ -98,43 +110,75 @@ extern "C" fn ex_handler_breakpoint(ctx: &mut X86ExceptionContext) { // Doube-Fault handler #[no_mangle] -extern "C" fn ex_handler_double_fault(ctx: &mut X86ExceptionContext) { +extern "C" fn ex_handler_double_fault(ctxt: &mut X86ExceptionContext) { let cr2 = read_cr2(); - let rip = ctx.frame.rip; - let rsp = ctx.frame.rsp; - panic!( - "Double-Fault at RIP {:#018x} RSP: {:#018x} CR2: {:#018x}", - rip, rsp, cr2 - ); + let rip = ctxt.frame.rip; + let rsp = ctxt.frame.rsp; + + if user_mode(ctxt) { + log::error!( + "Double-Fault at RIP {:#018x} RSP: {:#018x} CR2: {:#018x} - Terminating task", + rip, + rsp, + cr2 + ); + terminate(); + } else { + panic!( + "Double-Fault at RIP {:#018x} RSP: {:#018x} CR2: {:#018x}", + rip, rsp, cr2 + ); + } } // General-Protection handler #[no_mangle] -extern "C" fn ex_handler_general_protection(ctx: &mut X86ExceptionContext) { - let rip = ctx.frame.rip; - let err = ctx.error_code; - - if !handle_exception_table(ctx) { +extern "C" fn ex_handler_general_protection(ctxt: &mut X86ExceptionContext) { + let rip = ctxt.frame.rip; + let err = ctxt.error_code; + let rsp = ctxt.frame.rsp; + + if user_mode(ctxt) { + log::error!( + "Unhandled General-Protection-Fault at RIP {:#018x} error code: {:#018x} rsp: {:#018x} - Terminating task", + rip, err, rsp); + terminate(); + } else if !handle_exception_table(ctxt) { panic!( - "Unhandled General-Protection-Fault at RIP {:#018x} error code: {:#018x}", - rip, err + "Unhandled General-Protection-Fault at RIP {:#018x} error code: {:#018x} rsp: {:#018x}", + rip, err, rsp ); } } // Page-Fault handler #[no_mangle] -extern "C" fn ex_handler_page_fault(ctx: &mut X86ExceptionContext) { +extern "C" fn ex_handler_page_fault(ctxt: &mut X86ExceptionContext) { let cr2 = read_cr2(); - let rip = ctx.frame.rip; - let err = ctx.error_code; - - if this_cpu() + let rip = ctxt.frame.rip; + let err = ctxt.error_code; + let vaddr = VirtAddr::from(cr2); + + if user_mode(ctxt) { + let kill_task: bool = if is_task_fault(vaddr) { + current_task() + .fault(vaddr, (err & PF_ERROR_WRITE) != 0) + .is_err() + } else { + true + }; + + if kill_task { + log::error!("Unexpected user-mode page-fault at RIP {:#018x} CR2: {:#018x} error code: {:#018x} - Terminating task", + rip, cr2, err); + terminate(); + } + } else if this_cpu() .handle_pf(VirtAddr::from(cr2), (err & PF_ERROR_WRITE) != 0) .is_err() - && !handle_exception_table(ctx) + && !handle_exception_table(ctxt) { - handle_debug_exception(ctx, ctx.vector); + handle_debug_exception(ctxt, ctxt.vector); panic!( "Unhandled Page-Fault at RIP {:#018x} CR2: {:#018x} error code: {:#018x}", rip, cr2, err @@ -144,7 +188,7 @@ extern "C" fn ex_handler_page_fault(ctx: &mut X86ExceptionContext) { // Hypervisor Injection handler #[no_mangle] -extern "C" fn ex_handler_hypervisor_injection(_ctx: &mut X86ExceptionContext) { +extern "C" fn ex_handler_hypervisor_injection(_ctxt: &mut X86ExceptionContext) { // #HV processing is not required in the SVSM. If a maskable // interrupt occurs, it will be processed prior to the next exit. // There are no NMI sources, and #MC cannot be handled anyway @@ -153,8 +197,41 @@ extern "C" fn ex_handler_hypervisor_injection(_ctx: &mut X86ExceptionContext) { // VMM Communication handler #[no_mangle] -extern "C" fn ex_handler_vmm_communication(ctx: &mut X86ExceptionContext) { - handle_vc_exception(ctx).expect("Failed to handle #VC"); +extern "C" fn ex_handler_vmm_communication(ctxt: &mut X86ExceptionContext) { + let rip = ctxt.frame.rip; + let code = ctxt.error_code; + + if let Err(err) = handle_vc_exception(ctxt) { + log::error!("#VC handling error: {:?}", err); + if user_mode(ctxt) { + log::error!("Failed to handle #VC from user-mode at RIP {:#018x} code: {:#018x} - Terminating task", rip, code); + terminate(); + } else { + panic!( + "Failed to handle #VC from kernel-mode at RIP {:#018x} code: {:#018x}", + rip, code + ); + } + } +} + +// System Call SoftIRQ handler +#[no_mangle] +extern "C" fn ex_handler_system_call(ctxt: &mut X86ExceptionContext) { + if !user_mode(ctxt) { + panic!("Syscall handler called from kernel mode!"); + } + + let Ok(input) = TryInto::::try_into(ctxt.regs.rax) else { + ctxt.regs.rax = !0; + return; + }; + + ctxt.regs.rax = match input { + SYS_HELLO => sys_hello(), + SYS_EXIT => sys_exit(), + _ => !0, + }; } #[no_mangle] @@ -162,9 +239,11 @@ pub extern "C" fn ex_handler_panic(ctx: &mut X86ExceptionContext) { let vec = ctx.vector; let rip = ctx.frame.rip; let err = ctx.error_code; + let rsp = ctx.frame.rsp; + let ss = ctx.frame.ss; panic!( - "Unhandled exception {} RIP {:#018x} error code: {:#018x}", - vec, rip, err + "Unhandled exception {} RIP {:#018x} error code: {:#018x} RSP: {:#018x} SS: {:#x}", + vec, rip, err, rsp, ss ); } diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index 25bd07d1c..542b15ead 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -26,9 +26,7 @@ use crate::mm::{ use crate::sev::ghcb::GHCB; use crate::sev::utils::RMPFlags; use crate::sev::vmsa::allocate_new_vmsa; -use crate::task::{ - schedule, schedule_task, RunQueue, Task, TaskPointer, WaitQueue, TASK_FLAG_SHARE_PT, -}; +use crate::task::{schedule, schedule_task, RunQueue, Task, TaskPointer, WaitQueue}; use crate::types::{PAGE_SHIFT, PAGE_SHIFT_2M, PAGE_SIZE, PAGE_SIZE_2M, SVSM_TR_FLAGS, SVSM_TSS}; use crate::utils::MemoryRegion; use alloc::sync::Arc; @@ -388,8 +386,7 @@ impl PerCpu { fn allocate_page_table(&mut self) -> Result<(), SvsmError> { self.vm_range.initialize()?; - let mut pgtable_ref = get_init_pgtable_locked().clone_shared()?; - self.vm_range.populate(&mut pgtable_ref); + let pgtable_ref = get_init_pgtable_locked().clone_shared()?; self.set_pgtable(pgtable_ref); Ok(()) @@ -484,6 +481,11 @@ impl PerCpu { Ok(()) } + fn finish_page_table(&mut self) { + let mut pgtable = self.get_pgtable(); + self.vm_range.populate(&mut pgtable); + } + pub fn dump_vm_ranges(&self) { self.vm_range.dump_ranges(); } @@ -513,6 +515,8 @@ impl PerCpu { // Initialize allocator for temporary mappings self.virt_range_init(); + self.finish_page_table(); + Ok(()) } @@ -522,7 +526,7 @@ impl PerCpu { } pub fn setup_idle_task(&mut self, entry: extern "C" fn()) -> Result<(), SvsmError> { - let idle_task = Task::create(self, entry, TASK_FLAG_SHARE_PT)?; + let idle_task = Task::create(self, entry)?; self.runqueue.lock_read().set_idle_task(idle_task); Ok(()) } @@ -704,6 +708,14 @@ impl PerCpu { pub fn runqueue(&self) -> &RWLock { &self.runqueue } + + pub fn current_task(&self) -> TaskPointer { + self.runqueue.lock_read().current_task() + } + + pub fn set_tss_rsp0(&mut self, addr: VirtAddr) { + self.tss.stacks[0] = addr; + } } pub fn this_cpu_unsafe() -> *mut PerCpuUnsafe { diff --git a/kernel/src/cpu/smp.rs b/kernel/src/cpu/smp.rs index 268aad89e..0d98cf788 100644 --- a/kernel/src/cpu/smp.rs +++ b/kernel/src/cpu/smp.rs @@ -9,7 +9,7 @@ use crate::cpu::ghcb::current_ghcb; use crate::cpu::percpu::{this_cpu_mut, this_cpu_shared, PerCpu}; use crate::cpu::vmsa::init_svsm_vmsa; use crate::requests::{request_loop, request_processing_main}; -use crate::task::{create_kernel_task, schedule_init, TASK_FLAG_SHARE_PT}; +use crate::task::{create_kernel_task, schedule_init}; use crate::utils::immut_after_init::immut_after_init_set_multithreaded; fn start_cpu(apic_id: u32, vtom: u64) { @@ -77,8 +77,7 @@ fn start_ap() { #[no_mangle] pub extern "C" fn ap_request_loop() { - create_kernel_task(request_processing_main, TASK_FLAG_SHARE_PT) - .expect("Failed to launch request processing task"); + create_kernel_task(request_processing_main).expect("Failed to launch request processing task"); request_loop(); panic!("Returned from request_loop!"); } diff --git a/kernel/src/fs/api.rs b/kernel/src/fs/api.rs index 2c4148d50..2005f0b83 100644 --- a/kernel/src/fs/api.rs +++ b/kernel/src/fs/api.rs @@ -113,7 +113,20 @@ pub trait File: Debug + Send + Sync { /// /// size of the file in bytes. fn size(&self) -> usize; - fn mapping(&self, offset: usize) -> Option; + + /// Get reference to backing pages of the file + /// + /// # Arguments + /// + /// - `offset`: offset to the requested page in bytes + /// + /// # Returns + /// + /// [`Option`]: An [`Option`] with the requested page reference. + /// `None` if the offset is not backed by a page. + fn mapping(&self, _offset: usize) -> Option { + None + } } /// Represents directory operations diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 1ba859f51..2276766da 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -34,6 +34,7 @@ pub mod sev; pub mod string; pub mod svsm_console; pub mod svsm_paging; +pub mod syscall; pub mod task; pub mod types; pub mod utils; diff --git a/kernel/src/mm/address_space.rs b/kernel/src/mm/address_space.rs index e1c600663..76a21c774 100644 --- a/kernel/src/mm/address_space.rs +++ b/kernel/src/mm/address_space.rs @@ -154,6 +154,16 @@ pub const SVSM_PERTASK_END: VirtAddr = SVSM_PERTASK_BASE.const_add(SIZE_LEVEL3); /// Kernel stack for a task pub const SVSM_PERTASK_STACK_BASE: VirtAddr = SVSM_PERTASK_BASE; +// +// User-space mapping constants +// + +/// Start of user memory address range +pub const USER_MEM_START: VirtAddr = VirtAddr::new(0); + +/// End of user memory address range +pub const USER_MEM_END: VirtAddr = USER_MEM_START.const_add(256 * SIZE_LEVEL3); + #[cfg(test)] mod tests { use super::*; diff --git a/kernel/src/mm/alloc.rs b/kernel/src/mm/alloc.rs index faeb613a7..99e973162 100644 --- a/kernel/src/mm/alloc.rs +++ b/kernel/src/mm/alloc.rs @@ -917,6 +917,19 @@ impl PageRef { pub fn phys_addr(&self) -> PhysAddr { self.phys_addr } + + pub fn try_copy_page(&self) -> Result { + let virt_addr = allocate_file_page()?; + unsafe { + let src = self.virt_addr.as_ptr::<[u8; PAGE_SIZE]>(); + let dst = virt_addr.as_mut_ptr::<[u8; PAGE_SIZE]>(); + ptr::copy_nonoverlapping(src, dst, 1); + } + Ok(PageRef { + virt_addr, + phys_addr: virt_to_phys(virt_addr), + }) + } } impl AsRef<[u8; PAGE_SIZE]> for PageRef { diff --git a/kernel/src/mm/mappings.rs b/kernel/src/mm/mappings.rs new file mode 100644 index 000000000..52075bdc7 --- /dev/null +++ b/kernel/src/mm/mappings.rs @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: MIT +// +// Copyright (c) 2024 SUSE LLC +// +// Author: Joerg Roedel + +use crate::address::VirtAddr; +use crate::error::SvsmError; +use crate::fs::FileHandle; +use crate::mm::vm::{Mapping, VMFileMapping, VMFileMappingFlags, VMalloc, VMR}; +use crate::task::current_task; + +use core::ops::Deref; + +extern crate alloc; +use alloc::sync::Arc; + +#[derive(Debug)] +pub struct VMMappingGuard<'a> { + vmr: &'a VMR, + start: VirtAddr, +} + +impl<'a> VMMappingGuard<'a> { + pub fn new(vmr: &'a VMR, start: VirtAddr) -> Self { + VMMappingGuard { vmr, start } + } +} + +impl Deref for VMMappingGuard<'_> { + type Target = VirtAddr; + + fn deref(&self) -> &VirtAddr { + &self.start + } +} + +impl Drop for VMMappingGuard<'_> { + fn drop(&mut self) { + self.vmr + .remove(self.start) + .expect("Fatal error: Failed to unmap region from MappingGuard"); + } +} + +pub fn create_file_mapping( + file: &FileHandle, + offset: usize, + size: usize, + flags: VMFileMappingFlags, +) -> Result, SvsmError> { + let file_mapping = VMFileMapping::new(file, offset, size, flags)?; + Ok(Arc::new(Mapping::new(file_mapping))) +} + +pub fn create_anon_mapping( + size: usize, + flags: VMFileMappingFlags, +) -> Result, SvsmError> { + let alloc = VMalloc::new(size, flags)?; + Ok(Arc::new(Mapping::new(alloc))) +} + +pub fn mmap_user( + addr: VirtAddr, + file: Option<&FileHandle>, + offset: usize, + size: usize, + flags: VMFileMappingFlags, +) -> Result { + current_task().mmap_user(addr, file, offset, size, flags) +} + +pub fn mmap_kernel( + addr: VirtAddr, + file: Option<&FileHandle>, + offset: usize, + size: usize, + flags: VMFileMappingFlags, +) -> Result { + current_task().mmap_kernel(addr, file, offset, size, flags) +} + +pub fn munmap_user(addr: VirtAddr) -> Result<(), SvsmError> { + current_task().munmap_user(addr) +} + +pub fn munmap_kernel(addr: VirtAddr) -> Result<(), SvsmError> { + current_task().munmap_kernel(addr) +} diff --git a/kernel/src/mm/mod.rs b/kernel/src/mm/mod.rs index 9721c532d..159200980 100644 --- a/kernel/src/mm/mod.rs +++ b/kernel/src/mm/mod.rs @@ -7,6 +7,7 @@ pub mod address_space; pub mod alloc; pub mod guestmem; +pub mod mappings; pub mod memory; pub mod page_visibility; pub mod pagetable; @@ -24,3 +25,5 @@ pub use ptguards::*; pub use pagetable::PageTablePart; pub use alloc::{allocate_file_page, allocate_file_page_ref, PageRef}; + +pub use mappings::{mmap_kernel, mmap_user, munmap_kernel, munmap_user, VMMappingGuard}; diff --git a/kernel/src/mm/pagetable.rs b/kernel/src/mm/pagetable.rs index 17f71e785..2aafbcaf3 100644 --- a/kernel/src/mm/pagetable.rs +++ b/kernel/src/mm/pagetable.rs @@ -137,7 +137,7 @@ fn strip_confidentiality_bits(paddr: PhysAddr) -> PhysAddr { } bitflags! { - #[derive(Copy, Clone, Debug)] + #[derive(Copy, Clone, Debug, Default)] pub struct PTEntryFlags: u64 { const PRESENT = 1 << 0; const WRITABLE = 1 << 1; @@ -280,7 +280,8 @@ impl PageTable { } pub fn index(vaddr: VirtAddr) -> usize { - vaddr.bits() >> (12 + L * 9) & 0x1ff + vaddr.to_pgtbl_idx::() + //vaddr.bits() >> (12 + L * 9) & 0x1ff } fn entry_to_pagetable(entry: PTEntry) -> Option<&'static mut PTPage> { @@ -692,16 +693,17 @@ impl PageTable { } pub fn populate_pgtbl_part(&mut self, part: &PageTablePart) { - let idx = part.index(); - let paddr = part.address(); - let flags = PTEntryFlags::PRESENT - | PTEntryFlags::WRITABLE - | PTEntryFlags::USER - | PTEntryFlags::ACCESSED; - let entry = &mut self.root[idx]; - // The C bit is not required here because all page table fetches are - // made as C=1. - entry.set(paddr, flags); + if let Some(paddr) = part.address() { + let idx = part.index(); + let flags = PTEntryFlags::PRESENT + | PTEntryFlags::WRITABLE + | PTEntryFlags::USER + | PTEntryFlags::ACCESSED; + let entry = &mut self.root[idx]; + // The C bit is not required here because all page table fetches are + // made as C=1. + entry.set(paddr, flags); + } } } @@ -952,7 +954,7 @@ impl Drop for RawPageTablePart { #[derive(Debug)] pub struct PageTablePart { /// The root of the page-table sub-tree - raw: Box, + raw: Option>, /// The top-level index this PageTablePart is populated at idx: usize, } @@ -969,11 +971,27 @@ impl PageTablePart { /// A new instance of PageTablePart pub fn new(start: VirtAddr) -> Self { PageTablePart { - raw: Box::::default(), + raw: None, idx: PageTable::index::<3>(start), } } + pub fn alloc(&mut self) { + self.get_or_init_mut(); + } + + fn get_or_init_mut(&mut self) -> &mut RawPageTablePart { + self.raw.get_or_insert_with(Box::default) + } + + fn get_mut(&mut self) -> Option<&mut RawPageTablePart> { + self.raw.as_deref_mut() + } + + fn get(&self) -> Option<&RawPageTablePart> { + self.raw.as_deref() + } + /// Request PageTable index to populate this instance to /// /// # Returns @@ -989,8 +1007,8 @@ impl PageTablePart { /// # Returns /// /// Physical base address of the page-table sub-tree - pub fn address(&self) -> PhysAddr { - self.raw.address() + pub fn address(&self) -> Option { + self.get().map(|p| p.address()) } /// Map a 4KiB page in the page table sub-tree @@ -1020,7 +1038,7 @@ impl PageTablePart { ) -> Result<(), SvsmError> { assert!(PageTable::index::<3>(vaddr) == self.idx); - self.raw.map_4k(vaddr, paddr, flags, shared) + self.get_or_init_mut().map_4k(vaddr, paddr, flags, shared) } /// Unmaps a 4KiB page from the page table sub-tree @@ -1039,7 +1057,7 @@ impl PageTablePart { pub fn unmap_4k(&mut self, vaddr: VirtAddr) -> Option { assert!(PageTable::index::<3>(vaddr) == self.idx); - self.raw.unmap_4k(vaddr) + self.get_mut().and_then(|r| r.unmap_4k(vaddr)) } /// Map a 2MiB page in the page table sub-tree @@ -1069,7 +1087,7 @@ impl PageTablePart { ) -> Result<(), SvsmError> { assert!(PageTable::index::<3>(vaddr) == self.idx); - self.raw.map_2m(vaddr, paddr, flags, shared) + self.get_or_init_mut().map_2m(vaddr, paddr, flags, shared) } /// Unmaps a 2MiB page from the page table sub-tree @@ -1088,6 +1106,6 @@ impl PageTablePart { pub fn unmap_2m(&mut self, vaddr: VirtAddr) -> Option { assert!(PageTable::index::<3>(vaddr) == self.idx); - self.raw.unmap_2m(vaddr) + self.get_mut().and_then(|r| r.unmap_2m(vaddr)) } } diff --git a/kernel/src/mm/vm/mapping/file_mapping.rs b/kernel/src/mm/vm/mapping/file_mapping.rs index 411a2afa6..f6eeb760d 100644 --- a/kernel/src/mm/vm/mapping/file_mapping.rs +++ b/kernel/src/mm/vm/mapping/file_mapping.rs @@ -6,82 +6,47 @@ extern crate alloc; -use core::slice::from_raw_parts_mut; - -#[cfg(not(test))] -use alloc::sync::Arc; - use alloc::vec::Vec; -#[cfg(not(test))] -use super::{Mapping, VMPhysMem}; +use bitflags::bitflags; -use super::{RawAllocMapping, VMPageFaultResolution, VirtualMapping}; -#[cfg(test)] -use crate::address::Address; +use super::{VMPageFaultResolution, VirtualMapping}; use crate::address::PhysAddr; use crate::error::SvsmError; use crate::fs::FileHandle; use crate::mm::vm::VMR; use crate::mm::PageRef; use crate::mm::{pagetable::PTEntryFlags, PAGE_SIZE}; -use crate::types::{PageSize, PAGE_SHIFT}; +use crate::types::PAGE_SHIFT; use crate::utils::align_up; -#[derive(Debug)] -struct VMWriteFileMapping(RawAllocMapping); - -impl VMWriteFileMapping { - pub fn get_alloc(&self) -> &RawAllocMapping { - &self.0 - } - - pub fn get_alloc_mut(&mut self) -> &mut RawAllocMapping { - &mut self.0 +bitflags! { + #[derive(Debug, PartialEq, Copy, Clone)] + pub struct VMFileMappingFlags : u32 { + /// Read-only access to the file + const Read = 1 << 0; + // Read/Write access to a copy of the files pages + const Write = 1 << 1; + // Read-only access that allows execution + const Execute = 1 << 2; + // Map private copies of file pages + const Private = 1 << 3; + // Map at a fixed address + const Fixed = 1 << 4; } } -impl VirtualMapping for VMWriteFileMapping { - fn mapping_size(&self) -> usize { - self.0.mapping_size() - } - - fn map(&self, offset: usize) -> Option { - self.0.map(offset) - } - - fn pt_flags(&self, _offset: usize) -> PTEntryFlags { - PTEntryFlags::task_data() - } -} - -#[derive(Debug, PartialEq, Copy, Clone)] -pub enum VMFileMappingPermission { - /// Read-only access to the file - Read, - // Read/Write access to a copy of the files pages - Write, - // Read-only access that allows execution - Execute, -} - /// Map view of a ramfs file into virtual memory #[derive(Debug)] pub struct VMFileMapping { - /// The file that this mapping relates to - file: FileHandle, - /// The size of the mapping in bytes size: usize, - /// The permission to apply to the virtual mapping - permission: VMFileMappingPermission, + /// The flags to apply to the virtual mapping + flags: VMFileMappingFlags, /// A vec containing references to mapped pages within the file - pages: Vec>, - - /// A copy of the file pages for mappings with Write permission - write_copy: Option, + pages: Vec, } impl VMFileMapping { @@ -103,10 +68,10 @@ impl VMFileMapping { /// /// Initialized mapping on success, Err(SvsmError::Mem) on error pub fn new( - file: FileHandle, + file: &FileHandle, offset: usize, size: usize, - permission: VMFileMappingPermission, + flags: VMFileMappingFlags, ) -> Result { let page_size = align_up(size, PAGE_SIZE); let file_size = align_up(file.size(), PAGE_SIZE); @@ -119,48 +84,26 @@ impl VMFileMapping { // Take references to the file pages let count = page_size >> PAGE_SHIFT; - let mut pages = Vec::>::new(); + let mut pages = Vec::::new(); for page_index in 0..count { - pages.push(file.mapping(offset + page_index * PAGE_SIZE)); + let page_ref = file + .mapping(offset + page_index * PAGE_SIZE) + .ok_or(SvsmError::Mem)?; + if flags.contains(VMFileMappingFlags::Private) { + pages.push(page_ref.try_copy_page()?); + } else { + pages.push(page_ref); + } } - // For ranges with write access we need to take a copy of the ram pages - // to allow them to be written to without modifying the contents of the - // file itself and also to prevent pointer aliasing with any other - // FileHandles that may be open on the same file. - let write_copy = if permission == VMFileMappingPermission::Write { - Some(VMWriteFileMapping(RawAllocMapping::new(size))) - } else { - None - }; - Ok(Self { - file, size: page_size, - permission, + flags, pages, - write_copy, }) } } #[cfg(not(test))] -fn copy_page( - vmr: &VMR, - file: &FileHandle, - offset: usize, - paddr_dst: PhysAddr, - page_size: PageSize, -) -> Result<(), SvsmError> { - let page_size = usize::from(page_size); - let temp_map = VMPhysMem::new(paddr_dst, page_size, true); - let vaddr_new_page = vmr.insert(Arc::new(Mapping::new(temp_map)))?; - let slice = unsafe { from_raw_parts_mut(vaddr_new_page.as_mut_ptr::(), page_size) }; - file.seek(offset); - file.read(slice)?; - vmr.remove(vaddr_new_page)?; - Ok(()) -} - #[cfg(test)] fn copy_page( _vmr: &VMR, @@ -189,61 +132,30 @@ impl VirtualMapping for VMFileMapping { if page_index >= self.pages.len() { return None; } - if let Some(write_copy) = &self.write_copy { - if let Some(write_addr) = write_copy.map(offset) { - return Some(write_addr); - }; - } - self.pages[page_index].as_ref().map(|p| p.phys_addr()) + Some(self.pages[page_index].phys_addr()) } - fn pt_flags(&self, offset: usize) -> PTEntryFlags { - match self.permission { - VMFileMappingPermission::Read => PTEntryFlags::task_data_ro(), - VMFileMappingPermission::Write => { - if let Some(write_copy) = &self.write_copy { - if write_copy.get_alloc().present(offset) { - PTEntryFlags::task_data() - } else { - PTEntryFlags::task_data_ro() - } - } else { - PTEntryFlags::task_data_ro() - } - } - VMFileMappingPermission::Execute => PTEntryFlags::task_exec(), + fn pt_flags(&self, _offset: usize) -> PTEntryFlags { + let mut flags = PTEntryFlags::empty(); + + if self.flags.contains(VMFileMappingFlags::Write) { + flags |= PTEntryFlags::WRITABLE; + } + + if !self.flags.contains(VMFileMappingFlags::Execute) { + flags |= PTEntryFlags::NX; } + + flags } fn handle_page_fault( &mut self, - vmr: &VMR, - offset: usize, - write: bool, + _vmr: &VMR, + _offset: usize, + _write: bool, ) -> Result { - let page_size = self.page_size(); - let page_size_bytes = usize::from(page_size); - - if !write { - return Err(SvsmError::Mem); - } - - let Some(write_copy) = self.write_copy.as_mut() else { - return Err(SvsmError::Mem); - }; - - // This is a writeable region with copy-on-write access. The - // page fault will have occurred because the page has not yet - // been allocated. Allocate a page and copy the readonly source - // page into the new writeable page. - let offset_aligned = offset & !(page_size_bytes - 1); - write_copy.get_alloc_mut().alloc_page(offset_aligned)?; - let paddr_new_page = write_copy.map(offset_aligned).ok_or(SvsmError::Mem)?; - copy_page(vmr, &self.file, offset_aligned, paddr_new_page, page_size)?; - Ok(VMPageFaultResolution { - paddr: paddr_new_page, - flags: PTEntryFlags::task_data(), - }) + Err(SvsmError::Mem) } } @@ -251,7 +163,6 @@ impl VirtualMapping for VMFileMapping { mod tests { use super::*; use crate::{ - address::VirtAddr, fs::{create, open, unlink, TestFileSystemGuard}, mm::alloc::{TestRootMem, DEFAULT_TEST_MEMORY_SIZE}, types::PAGE_SIZE, @@ -287,10 +198,10 @@ mod tests { let _test_fs = TestFileSystemGuard::setup(); let (fh, name) = create_512b_test_file(); - let vm = VMFileMapping::new(fh, 0, 512, VMFileMappingPermission::Read) + let vm = VMFileMapping::new(&fh, 0, 512, VMFileMappingFlags::Read) .expect("Failed to create new VMFileMapping"); assert_eq!(vm.mapping_size(), PAGE_SIZE); - assert_eq!(vm.permission, VMFileMappingPermission::Read); + assert!(vm.flags.contains(VMFileMappingFlags::Read)); assert_eq!(vm.pages.len(), 1); unlink(name).unwrap(); } @@ -305,12 +216,7 @@ mod tests { let (fh, name) = create_16k_test_file(); let fh2 = open(name).unwrap(); - let vm = VMFileMapping::new( - fh, - offset, - fh2.size() - offset, - VMFileMappingPermission::Read, - ); + let vm = VMFileMapping::new(&fh, offset, fh2.size() - offset, VMFileMappingFlags::Read); assert!(vm.is_err()); unlink(name).unwrap(); } @@ -322,7 +228,7 @@ mod tests { let (fh, name) = create_16k_test_file(); let fh2 = open(name).unwrap(); - let vm = VMFileMapping::new(fh, 0, fh2.size() + 1, VMFileMappingPermission::Read); + let vm = VMFileMapping::new(&fh, 0, fh2.size() + 1, VMFileMappingFlags::Read); assert!(vm.is_err()); unlink(name).unwrap(); } @@ -334,18 +240,18 @@ mod tests { let (fh, name) = create_16k_test_file(); let fh2 = open(name).unwrap(); - let vm = VMFileMapping::new(fh, PAGE_SIZE, fh2.size(), VMFileMappingPermission::Read); + let vm = VMFileMapping::new(&fh, PAGE_SIZE, fh2.size(), VMFileMappingFlags::Read); assert!(vm.is_err()); unlink(name).unwrap(); } - fn test_map_first_page(permission: VMFileMappingPermission) { + fn test_map_first_page(flags: VMFileMappingFlags) { let _test_mem = TestRootMem::setup(DEFAULT_TEST_MEMORY_SIZE); let _test_fs = TestFileSystemGuard::setup(); let (fh, name) = create_512b_test_file(); let vm = - VMFileMapping::new(fh, 0, 512, permission).expect("Failed to create new VMFileMapping"); + VMFileMapping::new(&fh, 0, 512, flags).expect("Failed to create new VMFileMapping"); let res = vm .map(0) @@ -361,13 +267,13 @@ mod tests { unlink(name).unwrap(); } - fn test_map_multiple_pages(permission: VMFileMappingPermission) { + fn test_map_multiple_pages(flags: VMFileMappingFlags) { let _test_mem = TestRootMem::setup(DEFAULT_TEST_MEMORY_SIZE); let _test_fs = TestFileSystemGuard::setup(); let (fh, name) = create_16k_test_file(); let fh2 = open(name).unwrap(); - let vm = VMFileMapping::new(fh, 0, fh2.size(), permission) + let vm = VMFileMapping::new(&fh, 0, fh2.size(), flags) .expect("Failed to create new VMFileMapping"); for i in 0..4 { @@ -385,13 +291,13 @@ mod tests { unlink(name).unwrap(); } - fn test_map_unaligned_file_size(permission: VMFileMappingPermission) { + fn test_map_unaligned_file_size(flags: VMFileMappingFlags) { let _test_mem = TestRootMem::setup(DEFAULT_TEST_MEMORY_SIZE); let _test_fs = TestFileSystemGuard::setup(); let (fh, name) = create_5000b_test_file(); let fh2 = open(name).unwrap(); - let vm = VMFileMapping::new(fh, 0, fh2.size(), permission) + let vm = VMFileMapping::new(&fh, 0, fh2.size(), flags) .expect("Failed to create new VMFileMapping"); assert_eq!(vm.mapping_size(), PAGE_SIZE * 2); @@ -412,13 +318,13 @@ mod tests { unlink(name).unwrap(); } - fn test_map_non_zero_offset(permission: VMFileMappingPermission) { + fn test_map_non_zero_offset(flags: VMFileMappingFlags) { let _test_mem = TestRootMem::setup(DEFAULT_TEST_MEMORY_SIZE); let _test_fs = TestFileSystemGuard::setup(); let (fh, name) = create_16k_test_file(); let fh2 = open(name).unwrap(); - let vm = VMFileMapping::new(fh, 2 * PAGE_SIZE, PAGE_SIZE, permission) + let vm = VMFileMapping::new(&fh, 2 * PAGE_SIZE, PAGE_SIZE, flags) .expect("Failed to create new VMFileMapping"); assert_eq!(vm.mapping_size(), PAGE_SIZE); @@ -439,126 +345,41 @@ mod tests { #[test] fn test_map_first_page_readonly() { - test_map_first_page(VMFileMappingPermission::Read) + test_map_first_page(VMFileMappingFlags::Read) } #[test] fn test_map_multiple_pages_readonly() { - test_map_multiple_pages(VMFileMappingPermission::Read) + test_map_multiple_pages(VMFileMappingFlags::Read) } #[test] fn test_map_unaligned_file_size_readonly() { - test_map_unaligned_file_size(VMFileMappingPermission::Read) + test_map_unaligned_file_size(VMFileMappingFlags::Read) } #[test] fn test_map_non_zero_offset_readonly() { - test_map_non_zero_offset(VMFileMappingPermission::Read) + test_map_non_zero_offset(VMFileMappingFlags::Read) } #[test] fn test_map_first_page_readwrite() { - test_map_first_page(VMFileMappingPermission::Write) + test_map_first_page(VMFileMappingFlags::Write) } #[test] fn test_map_multiple_pages_readwrite() { - test_map_multiple_pages(VMFileMappingPermission::Write) + test_map_multiple_pages(VMFileMappingFlags::Write) } #[test] fn test_map_unaligned_file_size_readwrite() { - test_map_unaligned_file_size(VMFileMappingPermission::Write) + test_map_unaligned_file_size(VMFileMappingFlags::Write) } #[test] fn test_map_non_zero_offset_readwrite() { - test_map_non_zero_offset(VMFileMappingPermission::Write) - } - - #[test] - #[cfg_attr(test_in_svsm, ignore = "FIXME")] - fn test_handle_page_fault() { - let _test_mem = TestRootMem::setup(DEFAULT_TEST_MEMORY_SIZE); - let _test_fs = TestFileSystemGuard::setup(); - - let (fh, name) = create_16k_test_file(); - let fh2 = open(name).unwrap(); - let mut vm = VMFileMapping::new(fh, 0, fh2.size(), VMFileMappingPermission::Write) - .expect("Failed to create new VMFileMapping"); - - let vmr = VMR::new( - VirtAddr::from(0usize), - VirtAddr::from(16usize * PAGE_SIZE), - PTEntryFlags::data(), - ); - let res = vm - .handle_page_fault(&vmr, PAGE_SIZE, true) - .expect("handle_page_fault() failed"); - assert!(vm.write_copy.is_some()); - assert_eq!( - vm.write_copy.as_ref().unwrap().0.mapping_size(), - vm.mapping_size() - ); - assert_eq!( - res.paddr, - vm.write_copy - .as_ref() - .unwrap() - .0 - .map(PAGE_SIZE) - .expect("Page not allocated") - ); - // create_16k_test_file() populates the first byte of each 4K page with - // the page number. We can use this to check if the copy from the file - // page to the writeable page worked correctly. - assert_eq!(unsafe { (res.paddr.bits() as *const u8).read() }, 1); - - assert_eq!( - vm.map(PAGE_SIZE).expect("Failed to map file page"), - res.paddr - ); - unlink(name).unwrap(); - } - - #[test] - #[cfg_attr(test_in_svsm, ignore = "FIXME")] - fn test_handle_page_fault_unaligned_addr() { - let _test_mem = TestRootMem::setup(DEFAULT_TEST_MEMORY_SIZE); - let _test_fs = TestFileSystemGuard::setup(); - - let (fh, name) = create_16k_test_file(); - let fh2 = open(name).unwrap(); - let mut vm = VMFileMapping::new(fh, 0, fh2.size(), VMFileMappingPermission::Write) - .expect("Failed to create new VMFileMapping"); - - let vmr = VMR::new( - VirtAddr::from(0usize), - VirtAddr::from(16usize * PAGE_SIZE), - PTEntryFlags::data(), - ); - let res = vm - .handle_page_fault(&vmr, PAGE_SIZE * 2 + 1, true) - .expect("handle_page_fault() failed"); - assert_eq!( - res.paddr, - vm.write_copy - .as_ref() - .unwrap() - .0 - .map(PAGE_SIZE * 2) - .expect("Page not allocated") - ); - // create_16k_test_file() populates the first byte of each 4K page with - // the page number. We can use this to check if the copy from the file - // page to the writeable page worked correctly. - assert_eq!(unsafe { (res.paddr.bits() as *const u8).read() }, 2); - - assert_eq!( - vm.map(PAGE_SIZE * 2).expect("Failed to map file page"), - res.paddr - ); - unlink(name).unwrap(); + test_map_non_zero_offset(VMFileMappingFlags::Write) } } diff --git a/kernel/src/mm/vm/mapping/mod.rs b/kernel/src/mm/vm/mapping/mod.rs index e6541e956..982f3e01e 100644 --- a/kernel/src/mm/vm/mapping/mod.rs +++ b/kernel/src/mm/vm/mapping/mod.rs @@ -13,7 +13,7 @@ pub mod reserved; pub mod vmalloc; pub use api::{Mapping, VMMAdapter, VMPageFaultResolution, VirtualMapping, VMM}; -pub use file_mapping::{VMFileMapping, VMFileMappingPermission}; +pub use file_mapping::{VMFileMapping, VMFileMappingFlags}; pub use kernel_stack::VMKernelStack; pub use phys_mem::VMPhysMem; pub use rawalloc::RawAllocMapping; diff --git a/kernel/src/mm/vm/mapping/vmalloc.rs b/kernel/src/mm/vm/mapping/vmalloc.rs index 41a3090fb..d4b03316c 100644 --- a/kernel/src/mm/vm/mapping/vmalloc.rs +++ b/kernel/src/mm/vm/mapping/vmalloc.rs @@ -9,7 +9,7 @@ use crate::error::SvsmError; use crate::mm::pagetable::PTEntryFlags; use super::rawalloc::RawAllocMapping; -use super::{Mapping, VirtualMapping}; +use super::{Mapping, VMFileMappingFlags, VirtualMapping}; /// Virtual mapping backed by allocated pages. This can be used for memory /// allocation if there is no need for the memory to be physically contiguous. @@ -19,6 +19,8 @@ use super::{Mapping, VirtualMapping}; pub struct VMalloc { /// [`RawAllocMapping`] used for memory allocation alloc: RawAllocMapping, + /// Page-table flags to map pages + flags: PTEntryFlags, } impl VMalloc { @@ -31,10 +33,20 @@ impl VMalloc { /// # Returns /// /// New instance on success, Err(SvsmError::Mem) on error - pub fn new(size: usize) -> Result { + pub fn new(size: usize, flags: VMFileMappingFlags) -> Result { let mut vmalloc = VMalloc { alloc: RawAllocMapping::new(size), + flags: PTEntryFlags::ACCESSED | PTEntryFlags::DIRTY, }; + + if flags.contains(VMFileMappingFlags::Write) { + vmalloc.flags |= PTEntryFlags::WRITABLE; + } + + if !flags.contains(VMFileMappingFlags::Execute) { + vmalloc.flags |= PTEntryFlags::NX; + } + vmalloc.alloc_pages()?; Ok(vmalloc) } @@ -48,8 +60,8 @@ impl VMalloc { /// # Returns /// /// New [`Mapping`] on success, Err(SvsmError::Mem) on error - pub fn new_mapping(size: usize) -> Result { - Ok(Mapping::new(Self::new(size)?)) + pub fn new_mapping(size: usize, flags: VMFileMappingFlags) -> Result { + Ok(Mapping::new(Self::new(size, flags)?)) } fn alloc_pages(&mut self) -> Result<(), SvsmError> { @@ -71,6 +83,6 @@ impl VirtualMapping for VMalloc { } fn pt_flags(&self, _offset: usize) -> PTEntryFlags { - PTEntryFlags::WRITABLE | PTEntryFlags::NX | PTEntryFlags::ACCESSED | PTEntryFlags::DIRTY + self.flags } } diff --git a/kernel/src/mm/vm/mod.rs b/kernel/src/mm/vm/mod.rs index 1c0d8baf2..78815370a 100644 --- a/kernel/src/mm/vm/mod.rs +++ b/kernel/src/mm/vm/mod.rs @@ -8,7 +8,7 @@ mod mapping; mod range; pub use mapping::{ - Mapping, RawAllocMapping, VMFileMapping, VMFileMappingPermission, VMKernelStack, VMMAdapter, + Mapping, RawAllocMapping, VMFileMapping, VMFileMappingFlags, VMKernelStack, VMMAdapter, VMPhysMem, VMReserved, VMalloc, VirtualMapping, VMM, }; pub use range::{VMRMapping, VMR, VMR_GRANULE}; diff --git a/kernel/src/mm/vm/range.rs b/kernel/src/mm/vm/range.rs index f898b6f8d..9b58d3d95 100644 --- a/kernel/src/mm/vm/range.rs +++ b/kernel/src/mm/vm/range.rs @@ -90,13 +90,18 @@ impl VMR { /// # Returns /// /// `Ok(())` on success, Err(SvsmError::Mem) on allocation error - fn alloc_page_tables(&self) -> Result<(), SvsmError> { - let count = ((self.end_pfn - self.start_pfn) << PAGE_SHIFT) / VMR_GRANULE; + fn alloc_page_tables(&self, lazy: bool) -> Result<(), SvsmError> { let start = VirtAddr::from(self.start_pfn << PAGE_SHIFT); + let end = VirtAddr::from(self.end_pfn << PAGE_SHIFT); + let count = end.to_pgtbl_idx::<3>() - start.to_pgtbl_idx::<3>(); let mut vec = self.pgtbl_parts.lock_write(); for idx in 0..count { - vec.push(PageTablePart::new(start + (idx * VMR_GRANULE))); + let mut part = PageTablePart::new(start + (idx * VMR_GRANULE)); + if !lazy { + part.alloc(); + } + vec.push(part); } Ok(()) @@ -115,18 +120,50 @@ impl VMR { } } + pub fn populate_addr(&self, pgtbl: &mut PageTableRef, vaddr: VirtAddr) { + let start = VirtAddr::from(self.start_pfn << PAGE_SHIFT); + let end = VirtAddr::from(self.end_pfn << PAGE_SHIFT); + assert!(vaddr >= start && vaddr < end); + + let idx = vaddr.to_pgtbl_idx::<3>() - start.to_pgtbl_idx::<3>(); + let parts = self.pgtbl_parts.lock_read(); + pgtbl.populate_pgtbl_part(&parts[idx]); + } + /// Initialize this [`VMR`] by checking the `start` and `end` values and /// allocating the [`PageTablePart`]s required for the mappings. /// + /// # Arguments + /// + /// * `lazy` - When `true`, use lazy allocation of [`PageTablePart`] pages. + /// /// # Returns /// /// `Ok(())` on success, Err(SvsmError::Mem) on allocation error - pub fn initialize(&mut self) -> Result<(), SvsmError> { + fn initialize_common(&mut self, lazy: bool) -> Result<(), SvsmError> { let start = VirtAddr::from(self.start_pfn << PAGE_SHIFT); let end = VirtAddr::from(self.end_pfn << PAGE_SHIFT); assert!(start < end && start.is_aligned(VMR_GRANULE) && end.is_aligned(VMR_GRANULE)); - self.alloc_page_tables() + self.alloc_page_tables(lazy) + } + + /// Initialize this [`VMR`] by calling `VMR::initialize_common` with `lazy = false` + /// + /// # Returns + /// + /// `Ok(())` on success, Err(SvsmError::Mem) on allocation error + pub fn initialize(&mut self) -> Result<(), SvsmError> { + self.initialize_common(false) + } + + /// Initialize this [`VMR`] by calling `VMR::initialize_common` with `lazy = true` + /// + /// # Returns + /// + /// `Ok(())` on success, Err(SvsmError::Mem) on allocation error + pub fn initialize_lazy(&mut self) -> Result<(), SvsmError> { + self.initialize_common(true) } /// Returns the virtual start and end addresses for this region @@ -283,6 +320,7 @@ impl VMR { /// Base address where the [`VMM`] was inserted on success or SvsmError::Mem on error pub fn insert_aligned( &self, + hint: VirtAddr, mapping: Arc, align: usize, ) -> Result { @@ -296,15 +334,20 @@ impl VMR { >> PAGE_SHIFT; let align = align >> PAGE_SHIFT; - let mut start = align_up(self.start_pfn, align); + let start_pfn = max(self.start_pfn, hint.pfn()); + + let mut start = align_up(start_pfn, align); let mut end = start; - if size == 0 { + if size == 0 || start_pfn >= self.end_pfn { return Err(SvsmError::Mem); } let mut tree = self.tree.lock_write(); - let mut cursor = tree.front_mut(); + let mut cursor = tree.upper_bound_mut(Bound::Included(&start_pfn)); + if cursor.is_null() { + cursor = tree.front_mut(); + } while let Some(node) = cursor.get() { let (node_start, node_end) = node.range_pfn(); @@ -331,8 +374,29 @@ impl VMR { /// Inserts [`VMM`] into the virtual memory region. This method takes the /// next power-of-two larger of the mapping size and uses that as the - /// alignment for the mappings base address. With that is calls - /// [`VMR::insert_aligned`]. + /// alignment for the mappings base address. The search for the base + /// address starts at `addr`. With that it calls [`VMR::insert_aligned`]. + /// + /// # Arguments + /// + /// * `addr` - The virtual address at which the search for a mapping area + /// starts + /// * `mapping` - `Arc` pointer to the VMM to insert + /// + /// # Returns + /// + /// Base address where the [`VMM`] was inserted on success or SvsmError::Mem on error + pub fn insert_hint( + &self, + addr: VirtAddr, + mapping: Arc, + ) -> Result { + let align = mapping.get().mapping_size().next_power_of_two(); + self.insert_aligned(addr, mapping, align) + } + + /// Inserts [`VMM`] into the virtual memory region. It searches from the + /// beginning of the [`VMR`] region for a suitable slot. /// /// # Arguments /// @@ -342,8 +406,7 @@ impl VMR { /// /// Base address where the [`VMM`] was inserted on success or SvsmError::Mem on error pub fn insert(&self, mapping: Arc) -> Result { - let align = mapping.get().mapping_size().next_power_of_two(); - self.insert_aligned(mapping, align) + self.insert_hint(VirtAddr::new(0), mapping) } /// Removes the mapping from a given base address from the RBTree @@ -398,43 +461,19 @@ impl VMR { /// '()' if the page fault was successfully handled. /// /// 'SvsmError::Mem' if the page fault should propogate to the next handler. - pub fn handle_page_fault(&self, vaddr: VirtAddr, write: bool) -> Result<(), SvsmError> { - // Get the mapping that contains the faulting address. This needs to - // be done as a separate step, returning a reference to the mapping to - // avoid issues with the mapping page fault handler needing mutable access - // to `self.tree` via `insert()`. - let (pf_mapping, start) = { - let tree = self.tree.lock_read(); - let addr = vaddr.pfn(); - let cursor = tree.find(&addr); - let node = cursor.get().ok_or(SvsmError::Mem)?; - let (start, end) = node.range(); - if vaddr < start || vaddr >= end { - return Err(SvsmError::Mem); - } - (node.get_mapping_clone(), start) - }; - - let resolution = pf_mapping - .get_mut() - .handle_page_fault(self, vaddr - start, write)?; - // The handler has resolved the page fault by allocating a new page. - // Update the page table accordingly. - let vaddr = vaddr.page_align(); - let page_size = pf_mapping.get().page_size(); - let shared = pf_mapping.get().shared(); - let mut pgtbl_parts = self.pgtbl_parts.lock_write(); + pub fn handle_page_fault(&self, vaddr: VirtAddr, _write: bool) -> Result<(), SvsmError> { + // Get the mapping that contains the faulting address and check if the + // fault happened on a mapped part of the range. - let (rstart, _) = self.virt_range(); - let idx = PageTable::index::<3>(VirtAddr::from(vaddr - rstart)); - match page_size { - PageSize::Regular => { - pgtbl_parts[idx].map_4k(vaddr, resolution.paddr, resolution.flags, shared)? - } - PageSize::Huge => { - pgtbl_parts[idx].map_2m(vaddr, resolution.paddr, resolution.flags, shared)? - } + let tree = self.tree.lock_read(); + let pfn = vaddr.pfn(); + let cursor = tree.upper_bound(Bound::Included(&pfn)); + let node = cursor.get().ok_or(SvsmError::Mem)?; + let (start, end) = node.range(); + if vaddr < start || vaddr >= end { + return Err(SvsmError::Mem); } + Ok(()) } } diff --git a/kernel/src/svsm.rs b/kernel/src/svsm.rs index 5a508fe06..12a662db1 100755 --- a/kernel/src/svsm.rs +++ b/kernel/src/svsm.rs @@ -47,7 +47,8 @@ use svsm::sev::utils::{rmp_adjust, RMPFlags}; use svsm::sev::{init_hypervisor_ghcb_features, secrets_page, secrets_page_mut, sev_status_init}; use svsm::svsm_console::SVSMIOPort; use svsm::svsm_paging::{init_page_table, invalidate_early_boot_memory}; -use svsm::task::{create_kernel_task, schedule_init, TASK_FLAG_SHARE_PT}; +use svsm::task::exec_user; +use svsm::task::{create_kernel_task, schedule_init}; use svsm::types::{PageSize, GUEST_VMPL, PAGE_SIZE}; use svsm::utils::{halt, immut_after_init::ImmutAfterInitCell, zero_mem_region}; #[cfg(all(feature = "mstpm", not(test)))] @@ -450,12 +451,15 @@ pub extern "C" fn svsm_main() { } } - create_kernel_task(request_processing_main, TASK_FLAG_SHARE_PT) - .expect("Failed to launch request processing task"); + create_kernel_task(request_processing_main).expect("Failed to launch request processing task"); #[cfg(test)] crate::test_main(); + if exec_user("/init").is_err() { + log::info!("Failed to launch /init"); + } + request_loop(); panic!("Road ends here!"); diff --git a/kernel/src/syscall/handlers.rs b/kernel/src/syscall/handlers.rs new file mode 100644 index 000000000..a1031b907 --- /dev/null +++ b/kernel/src/syscall/handlers.rs @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: MIT +// +// Copyright (c) 2024 SUSE LLC +// +// Author: Joerg Roedel + +use crate::task::{current_task_terminated, schedule}; + +pub fn sys_hello() -> usize { + log::info!("Hello, world! System call invoked from user-space."); + 0 +} + +pub fn sys_exit() -> ! { + log::info!("Terminating current task"); + unsafe { + current_task_terminated(); + } + schedule(); + panic!("schedule() returned in sys_exit()"); +} diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs new file mode 100644 index 000000000..a95368dac --- /dev/null +++ b/kernel/src/syscall/mod.rs @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: MIT +// +// Copyright (c) 2024 SUSE LLC +// +// Author: Joerg Roedel + +mod handlers; + +pub use handlers::*; diff --git a/kernel/src/task/exec.rs b/kernel/src/task/exec.rs new file mode 100644 index 000000000..f3b742f7f --- /dev/null +++ b/kernel/src/task/exec.rs @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: MIT +// +// Copyright (c) 2024 SUSE LLC +// +// Author: Joerg Roedel + +use crate::address::{Address, VirtAddr}; +use crate::error::SvsmError; +use crate::fs::open; +use crate::mm::vm::VMFileMappingFlags; +use crate::mm::USER_MEM_END; +use crate::task::{create_user_task, current_task, schedule}; +use crate::types::PAGE_SIZE; +use elf::{Elf64File, Elf64PhdrFlags}; + +fn convert_elf_phdr_flags(flags: Elf64PhdrFlags) -> VMFileMappingFlags { + let mut vm_flags = VMFileMappingFlags::Fixed; + + if flags.contains(Elf64PhdrFlags::WRITE) { + vm_flags |= VMFileMappingFlags::Write | VMFileMappingFlags::Private; + } + + if flags.contains(Elf64PhdrFlags::EXECUTE) { + vm_flags |= VMFileMappingFlags::Execute; + } + + vm_flags +} + +pub fn exec_user(binary: &str) -> Result<(), SvsmError> { + let fh = open(binary)?; + let file_size = fh.size(); + + let task = current_task(); + let vstart = task.mmap_kernel_guard( + VirtAddr::new(0), + Some(&fh), + 0, + file_size, + VMFileMappingFlags::Read, + )?; + let buf = unsafe { vstart.to_slice::(file_size) }; + let elf_bin = Elf64File::read(buf).map_err(|_| SvsmError::Mem)?; + + let alloc_info = elf_bin.image_load_vaddr_alloc_info(); + let virt_base = alloc_info.range.vaddr_begin; + let entry = elf_bin.get_entry(virt_base); + + let task = create_user_task(entry.try_into().unwrap())?; + + for seg in elf_bin.image_load_segment_iter(virt_base) { + let virt_start = VirtAddr::from(seg.vaddr_range.vaddr_begin); + let virt_end = VirtAddr::from(seg.vaddr_range.vaddr_end).align_up(PAGE_SIZE); + let file_offset = seg.file_range.offset_begin; + let len = virt_end - virt_start; + let flags = convert_elf_phdr_flags(seg.flags); + + if !virt_start.is_aligned(PAGE_SIZE) { + return Err(SvsmError::Mem); + } + + if file_offset > 0 { + task.mmap_user(virt_start, Some(&fh), file_offset, len, flags)?; + } else { + task.mmap_user(virt_start, None, 0, len, flags)?; + } + } + + // Make sure the mapping is gone before calling schedule + drop(vstart); + + // Setup 64k of task stack + let user_stack_size: usize = 64 * 1024; + let stack_flags: VMFileMappingFlags = VMFileMappingFlags::Fixed | VMFileMappingFlags::Write; + let stack_addr = USER_MEM_END - user_stack_size; + task.mmap_user(stack_addr, None, 0, user_stack_size, stack_flags)?; + + schedule(); + + Ok(()) +} diff --git a/kernel/src/task/mod.rs b/kernel/src/task/mod.rs index 0ba7659a0..9e4345f3c 100644 --- a/kernel/src/task/mod.rs +++ b/kernel/src/task/mod.rs @@ -4,17 +4,20 @@ // // Author: Roy Hopkins +mod exec; mod schedule; mod tasks; mod waiting; pub use schedule::{ - create_kernel_task, is_current_task, schedule, schedule_init, schedule_task, RunQueue, TASKLIST, + create_kernel_task, create_user_task, current_task, current_task_terminated, is_current_task, + schedule, schedule_init, schedule_task, terminate, RunQueue, TASKLIST, }; pub use tasks::{ - Task, TaskContext, TaskError, TaskListAdapter, TaskPointer, TaskRunListAdapter, TaskState, - INITIAL_TASK_ID, TASK_FLAG_SHARE_PT, + is_task_fault, Task, TaskContext, TaskError, TaskListAdapter, TaskPointer, TaskRunListAdapter, + TaskState, INITIAL_TASK_ID, TASK_FLAG_SHARE_PT, }; +pub use exec::exec_user; pub use waiting::WaitQueue; diff --git a/kernel/src/task/schedule.rs b/kernel/src/task/schedule.rs index 5f6ed7f72..9284451c9 100644 --- a/kernel/src/task/schedule.rs +++ b/kernel/src/task/schedule.rs @@ -223,9 +223,9 @@ impl TaskList { pub static TASKLIST: SpinLock = SpinLock::new(TaskList::new()); -pub fn create_kernel_task(entry: extern "C" fn(), flags: u16) -> Result { +pub fn create_kernel_task(entry: extern "C" fn()) -> Result { let mut cpu = this_cpu_mut(); - let task = Task::create(&mut cpu, entry, flags)?; + let task = Task::create(&mut cpu, entry)?; TASKLIST.lock().list().push_back(task.clone()); // Put task on the runqueue of this CPU @@ -237,6 +237,22 @@ pub fn create_kernel_task(entry: extern "C" fn(), flags: u16) -> Result Result { + let mut cpu = this_cpu_mut(); + let task = Task::create_user(&mut cpu, user_entry)?; + TASKLIST.lock().list().push_back(task.clone()); + + // Put task on the runqueue of this CPU + cpu.runqueue().lock_write().handle_task(task.clone()); + drop(cpu); + + Ok(task) +} + +pub fn current_task() -> TaskPointer { + this_cpu().current_task() +} + /// Check to see if the task scheduled on the current processor has the given id pub fn is_current_task(id: u32) -> bool { match &this_cpu().runqueue().lock_read().current_task { @@ -247,9 +263,9 @@ pub fn is_current_task(id: u32) -> bool { /// Terminates the current task. /// -/// # Panics +/// # Safety /// -/// Panics if there is no current task. +/// This function must only be called after scheduling is initialized, otherwise it will panic. pub unsafe fn current_task_terminated() { let cpu = this_cpu(); let mut rq = cpu.runqueue().lock_write(); @@ -260,6 +276,14 @@ pub unsafe fn current_task_terminated() { TASKLIST.lock().terminate(task_node.clone()); } +pub fn terminate() { + // TODO: re-evaluate whether current_task_terminated() needs to be unsafe + unsafe { + current_task_terminated(); + } + schedule(); +} + // SAFETY: This function returns a raw pointer to a task. It is safe // because this function is only used in the task switch code, which also only // takes a single reference to the next and previous tasks. Also, this @@ -311,6 +335,8 @@ pub fn schedule() { this_cpu().populate_page_table(&mut pt); } + this_cpu_mut().set_tss_rsp0(next.stack_bounds.end()); + // Get task-pointers, consuming the Arcs and release their reference unsafe { let a = task_pointer(current); diff --git a/kernel/src/task/tasks.rs b/kernel/src/task/tasks.rs index adbd51c97..c3235e9af 100644 --- a/kernel/src/task/tasks.rs +++ b/kernel/src/task/tasks.rs @@ -12,14 +12,21 @@ use core::mem::size_of; use core::sync::atomic::{AtomicU32, Ordering}; use crate::address::{Address, VirtAddr}; +use crate::cpu::idt::svsm::default_return; use crate::cpu::msr::read_flags; use crate::cpu::percpu::PerCpu; +use crate::cpu::X86ExceptionContext; use crate::cpu::X86GeneralRegs; use crate::error::SvsmError; +use crate::fs::FileHandle; use crate::locking::{RWLock, SpinLock}; -use crate::mm::pagetable::{get_init_pgtable_locked, PTEntryFlags, PageTableRef}; -use crate::mm::vm::{Mapping, VMKernelStack, VMR}; -use crate::mm::{SVSM_PERTASK_BASE, SVSM_PERTASK_END, SVSM_PERTASK_STACK_BASE}; +use crate::mm::pagetable::{PTEntryFlags, PageTableRef}; +use crate::mm::vm::{Mapping, VMFileMappingFlags, VMKernelStack, VMR}; +use crate::mm::{ + mappings::create_anon_mapping, mappings::create_file_mapping, VMMappingGuard, + SVSM_PERTASK_BASE, SVSM_PERTASK_END, SVSM_PERTASK_STACK_BASE, USER_MEM_END, USER_MEM_START, +}; +use crate::types::{SVSM_USER_CS, SVSM_USER_DS}; use crate::utils::MemoryRegion; use intrusive_collections::{intrusive_adapter, LinkedListAtomicLink}; @@ -109,7 +116,7 @@ impl TaskSchedState { pub struct Task { pub rsp: u64, - stack_bounds: MemoryRegion, + pub stack_bounds: MemoryRegion, /// Page table that is loaded when the task is scheduled pub page_table: SpinLock, @@ -117,6 +124,9 @@ pub struct Task { /// Task virtual memory range for use at CPL 0 vm_kernel_range: VMR, + /// Task virtual memory range for use at CPL 3 - None for kernel tasks + vm_user_range: Option, + /// State relevant for scheduler sched_state: RWLock, @@ -158,16 +168,8 @@ impl fmt::Debug for Task { } impl Task { - pub fn create( - cpu: &mut PerCpu, - entry: extern "C" fn(), - flags: u16, - ) -> Result { - let mut pgtable = if (flags & TASK_FLAG_SHARE_PT) != 0 { - cpu.get_pgtable().clone_shared()? - } else { - Self::allocate_page_table()? - }; + pub fn create(cpu: &mut PerCpu, entry: extern "C" fn()) -> Result { + let mut pgtable = cpu.get_pgtable().clone_shared()?; cpu.populate_page_table(&mut pgtable); @@ -175,7 +177,7 @@ impl Task { VMR::new(SVSM_PERTASK_BASE, SVSM_PERTASK_END, PTEntryFlags::empty()); vm_kernel_range.initialize()?; - let (stack, raw_bounds, rsp_offset) = Self::allocate_stack(cpu, entry)?; + let (stack, raw_bounds, rsp_offset) = Self::allocate_ktask_stack(cpu, entry)?; vm_kernel_range.insert_at(SVSM_PERTASK_STACK_BASE, stack)?; vm_kernel_range.populate(&mut pgtable); @@ -190,11 +192,56 @@ impl Task { rsp: bounds .end() .checked_sub(rsp_offset) - .expect("Invalid stack offset from task::allocate_stack()") + .expect("Invalid stack offset from task::allocate_ktask_stack()") + .bits() as u64, + stack_bounds: bounds, + page_table: SpinLock::new(pgtable), + vm_kernel_range, + vm_user_range: None, + sched_state: RWLock::new(TaskSchedState { + idle_task: false, + state: TaskState::RUNNING, + cpu: cpu.get_apic_id(), + }), + id: TASK_ID_ALLOCATOR.next_id(), + list_link: LinkedListAtomicLink::default(), + runlist_link: LinkedListAtomicLink::default(), + })) + } + + pub fn create_user(cpu: &mut PerCpu, user_entry: usize) -> Result { + let mut pgtable = cpu.get_pgtable().clone_shared()?; + + cpu.populate_page_table(&mut pgtable); + + let mut vm_kernel_range = + VMR::new(SVSM_PERTASK_BASE, SVSM_PERTASK_END, PTEntryFlags::empty()); + vm_kernel_range.initialize()?; + + let (stack, raw_bounds, stack_offset) = Self::allocate_utask_stack(cpu, user_entry)?; + vm_kernel_range.insert_at(SVSM_PERTASK_STACK_BASE, stack)?; + + vm_kernel_range.populate(&mut pgtable); + + let mut vm_user_range = VMR::new(USER_MEM_START, USER_MEM_END, PTEntryFlags::USER); + vm_user_range.initialize_lazy()?; + + // Remap at the per-task offset + let bounds = MemoryRegion::new( + SVSM_PERTASK_STACK_BASE + raw_bounds.start().into(), + raw_bounds.len(), + ); + + Ok(Arc::new(Task { + rsp: bounds + .end() + .checked_sub(stack_offset) + .expect("Invalid stack offset from task::allocate_utask_stack()") .bits() as u64, stack_bounds: bounds, page_table: SpinLock::new(pgtable), vm_kernel_range, + vm_user_range: Some(vm_user_range), sched_state: RWLock::new(TaskSchedState { idle_task: false, state: TaskState::RUNNING, @@ -259,14 +306,33 @@ impl Task { self.vm_kernel_range.handle_page_fault(vaddr, write) } - fn allocate_stack( - cpu: &mut PerCpu, - entry: extern "C" fn(), - ) -> Result<(Arc, MemoryRegion, usize), SvsmError> { + pub fn fault(&self, vaddr: VirtAddr, write: bool) -> Result<(), SvsmError> { + if vaddr >= USER_MEM_START && vaddr < USER_MEM_END && self.vm_user_range.is_some() { + let vmr = self.vm_user_range.as_ref().unwrap(); + let mut pgtbl = self.page_table.lock(); + vmr.populate_addr(&mut pgtbl, vaddr); + vmr.handle_page_fault(vaddr, write)?; + Ok(()) + } else { + Err(SvsmError::Mem) + } + } + + fn allocate_stack_common() -> Result<(Arc, MemoryRegion), SvsmError> { let stack = VMKernelStack::new()?; let bounds = stack.bounds(VirtAddr::from(0u64)); let mapping = Arc::new(Mapping::new(stack)); + + Ok((mapping, bounds)) + } + + fn allocate_ktask_stack( + cpu: &mut PerCpu, + entry: extern "C" fn(), + ) -> Result<(Arc, MemoryRegion, usize), SvsmError> { + let (mapping, bounds) = Task::allocate_stack_common()?; + let percpu_mapping = cpu.new_mapping(mapping.clone())?; // We need to setup a context on the stack that matches the stack layout @@ -286,12 +352,129 @@ impl Task { Ok((mapping, bounds, size_of::() + size_of::())) } - fn allocate_page_table() -> Result { - // Base the new task page table on the initial SVSM kernel page table. - // When the pagetable is schedule to a CPU, the per CPU entry will also - // be added to the pagetable. - get_init_pgtable_locked().clone_shared() + fn allocate_utask_stack( + cpu: &mut PerCpu, + user_entry: usize, + ) -> Result<(Arc, MemoryRegion, usize), SvsmError> { + let (mapping, bounds) = Task::allocate_stack_common()?; + + let percpu_mapping = cpu.new_mapping(mapping.clone())?; + + // We need to setup a context on the stack that matches the stack layout + // defined in switch_context below. + let stack_ptr = (percpu_mapping.virt_addr() + bounds.end().bits()).as_mut_ptr::(); + + let mut stack_offset = size_of::(); + + // 'Push' the task frame onto the stack + unsafe { + // Setup IRQ return frame + let mut iret_frame = X86ExceptionContext::default(); + iret_frame.frame.rip = user_entry; + iret_frame.frame.cs = (SVSM_USER_CS | 3).into(); + iret_frame.frame.flags = 0; + iret_frame.frame.rsp = (USER_MEM_END - 8).into(); + iret_frame.frame.ss = (SVSM_USER_DS | 3).into(); + + // Copy IRET frame to stack + let stack_iret_frame = stack_ptr.sub(stack_offset).cast::(); + *stack_iret_frame = iret_frame; + + stack_offset += size_of::(); + + let task_context = TaskContext { + ret_addr: VirtAddr::from(default_return as *const ()) + .bits() + .try_into() + .unwrap(), + ..Default::default() + }; + let stack_task_context = stack_ptr.sub(stack_offset).cast::(); + *stack_task_context = task_context; + } + + Ok((mapping, bounds, stack_offset)) + } + + pub fn mmap_common( + vmr: &VMR, + addr: VirtAddr, + file: Option<&FileHandle>, + offset: usize, + size: usize, + flags: VMFileMappingFlags, + ) -> Result { + let mapping = if let Some(f) = file { + create_file_mapping(f, offset, size, flags)? + } else { + create_anon_mapping(size, flags)? + }; + + if flags.contains(VMFileMappingFlags::Fixed) { + Ok(vmr.insert_at(addr, mapping)?) + } else { + Ok(vmr.insert_hint(addr, mapping)?) + } + } + + pub fn mmap_kernel( + &self, + addr: VirtAddr, + file: Option<&FileHandle>, + offset: usize, + size: usize, + flags: VMFileMappingFlags, + ) -> Result { + Self::mmap_common(&self.vm_kernel_range, addr, file, offset, size, flags) + } + + pub fn mmap_kernel_guard<'a>( + &'a self, + addr: VirtAddr, + file: Option<&FileHandle>, + offset: usize, + size: usize, + flags: VMFileMappingFlags, + ) -> Result, SvsmError> { + let vaddr = Self::mmap_common(&self.vm_kernel_range, addr, file, offset, size, flags)?; + Ok(VMMappingGuard::new(&self.vm_kernel_range, vaddr)) } + + pub fn mmap_user( + &self, + addr: VirtAddr, + file: Option<&FileHandle>, + offset: usize, + size: usize, + flags: VMFileMappingFlags, + ) -> Result { + if self.vm_user_range.is_none() { + return Err(SvsmError::Mem); + } + + let vmr = self.vm_user_range.as_ref().unwrap(); + + Self::mmap_common(vmr, addr, file, offset, size, flags) + } + + pub fn munmap_kernel(&self, addr: VirtAddr) -> Result<(), SvsmError> { + self.vm_kernel_range.remove(addr)?; + Ok(()) + } + + pub fn munmap_user(&self, addr: VirtAddr) -> Result<(), SvsmError> { + if self.vm_user_range.is_none() { + return Err(SvsmError::Mem); + } + + self.vm_user_range.as_ref().unwrap().remove(addr)?; + Ok(()) + } +} + +pub fn is_task_fault(vaddr: VirtAddr) -> bool { + (vaddr >= USER_MEM_START && vaddr < USER_MEM_END) + || (vaddr >= SVSM_PERTASK_BASE && vaddr < SVSM_PERTASK_END) } extern "C" fn task_exit() { diff --git a/syscall/Cargo.toml b/syscall/Cargo.toml new file mode 100644 index 000000000..555ddd2b2 --- /dev/null +++ b/syscall/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "syscall" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[lints] +workspace = true diff --git a/syscall/src/lib.rs b/syscall/src/lib.rs new file mode 100644 index 000000000..d02cfdf2d --- /dev/null +++ b/syscall/src/lib.rs @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: MIT +// +// Copyright (c) 2024 SUSE LLC +// +// Author: Joerg Roedel +#![no_std] + +mod numbers; + +pub use numbers::*; diff --git a/syscall/src/numbers.rs b/syscall/src/numbers.rs new file mode 100644 index 000000000..64e43a4ea --- /dev/null +++ b/syscall/src/numbers.rs @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: MIT +// +// Copyright (c) 2024 SUSE LLC +// +// Author: Joerg Roedel + +// SYSCALL numbers are not stable yet and just used for CPL-3 bringup + +pub const SYS_HELLO: u64 = 0; +pub const SYS_EXIT: u64 = 1;