From 0c82f1a88f3eafee6ff24ede5125d4ad6ce30a7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 16 Oct 2024 15:50:33 -0700 Subject: [PATCH] Test --- src/kernel/bpf/btf.rs | 93 ++++++++++++++++++++++++++++++ src/kernel/bpf/mod.rs | 3 + src/kernel/bpf/prog.rs | 109 ++++++++++++++++++++++++++++++++++++ src/kernel/bpf/sys.rs | 76 ++++++++++++++++++++++++- src/symbolize/symbolizer.rs | 9 ++- 5 files changed, 287 insertions(+), 3 deletions(-) create mode 100644 src/kernel/bpf/btf.rs diff --git a/src/kernel/bpf/btf.rs b/src/kernel/bpf/btf.rs new file mode 100644 index 000000000..8c5da1dbe --- /dev/null +++ b/src/kernel/bpf/btf.rs @@ -0,0 +1,93 @@ +use std::ffi::CStr; +use std::ffi::OsStr; +use std::os::fd::AsRawFd as _; +use std::os::unix::ffi::OsStrExt as _; + +use crate::Error; +use crate::ErrorExt as _; +use crate::Result; + +use super::sys; + + +/// A type encapsulating kernel provided BPF type information. +/// +/// +pub(crate) struct Btf { + /// The complete BTF data, including the "raw" header bytes. + data: Vec, + /// The extracted BTF header. + header: sys::btf_header, +} + +impl Btf { + /// Load BTF information with the given ID from the kernel. + pub fn load_from_id(btf_id: u32) -> Result { + let btf_fd = sys::bpf_btf_get_fd_from_id(btf_id) + .with_context(|| format!("failed to retrieve BTF file descriptor for ID {btf_id}"))?; + + // Do a first call to retrieve the BTF size we need. + let mut btf_info = sys::bpf_btf_info::default(); + let () = sys::bpf_btf_get_info_from_fd(btf_fd.as_raw_fd(), &mut btf_info) + .with_context(|| format!("failed to retrieve BTF information for ID {btf_id}"))?; + + // Now call again to retrieve the actual data. + let mut btf_data = Vec::::with_capacity(btf_info.btf_size as _); + // SAFETY: `btf_data` is valid for any bit pattern, so we can + // adjust the vector's length to its capacity. + let () = unsafe { btf_data.set_len(btf_data.capacity()) }; + + let mut btf_info = sys::bpf_btf_info { + btf: btf_data.as_mut_ptr() as _, + btf_size: btf_data.capacity() as _, + ..Default::default() + }; + let () = sys::bpf_btf_get_info_from_fd(btf_fd.as_raw_fd(), &mut btf_info) + .with_context(|| format!("failed to retrieve BTF information for ID {btf_id}"))?; + + let header = unsafe { + btf_data + .as_mut_ptr() + .cast::() + .read_unaligned() + }; + + if header.magic != 0xeb9f { + return Err(Error::with_unsupported(format!( + "encountered unsupported BTF magic number ({:#x})", + header.magic + ))) + } + + if header.version != 1 { + return Err(Error::with_unsupported(format!( + "encountered unsupported BTF version ({})", + header.version + ))) + } + + let slf = Self { + data: btf_data, + header, + }; + Ok(slf) + } + + /// Retrieve a slice representing the BTF string data. + fn raw_strs(&self) -> &[u8] { + let start = self.header.hdr_len as usize + self.header.str_off as usize; + let end = start + self.header.str_len as usize; + // SANITY: Sub-slice calculation is based on data provided by the + // kernel, which is trusted. + self.data.get(start..end).unwrap() + } + + /// Retrieve the "name" at the given offset. + pub fn name(&self, offset: u32) -> Option<&OsStr> { + let name = self.raw_strs().get(offset as _..)?; + // SANITY: The strings are trusted and laid out by the kernel; + // each entry has to be valid or it's a bug. + let name = CStr::from_bytes_until_nul(name).unwrap(); + Some(OsStr::from_bytes(name.to_bytes())) + } +} diff --git a/src/kernel/bpf/mod.rs b/src/kernel/bpf/mod.rs index ac00a1f41..f7be013d7 100644 --- a/src/kernel/bpf/mod.rs +++ b/src/kernel/bpf/mod.rs @@ -1,4 +1,7 @@ +mod btf; mod prog; mod sys; +use btf::Btf; + pub(super) use prog::BpfProg; diff --git a/src/kernel/bpf/prog.rs b/src/kernel/bpf/prog.rs index c7057aa02..ef9736aaf 100644 --- a/src/kernel/bpf/prog.rs +++ b/src/kernel/bpf/prog.rs @@ -1,7 +1,11 @@ use std::borrow::Cow; +use std::collections::HashMap; use std::fmt::Debug; +use std::path::Path; +use std::rc::Rc; use crate::inspect::SymInfo; +use crate::once::OnceCell; use crate::symbolize::ResolvedSym; use crate::symbolize::SrcLang; use crate::Addr; @@ -20,12 +24,21 @@ pub type BpfTag = u64; const _: () = assert!(size_of::() == BPF_TAG_SIZE); +#[derive(Debug)] +struct LineInfoRecord { + file: Rc, + line: Option, + col: Option, +} + + /// Information about a BPF program. #[derive(Debug)] pub struct BpfProg { addr: Addr, name: Box, tag: BpfTag, + line_info: OnceCell>, } impl BpfProg { @@ -44,6 +57,7 @@ impl BpfProg { addr, name: Box::from(name), tag, + line_info: OnceCell::new(), }; Some(prog) } @@ -102,11 +116,25 @@ impl<'prog> TryFrom<&'prog BpfProg> for SymInfo<'prog> { #[cfg(test)] mod tests { + use crate::ErrorExt as _; + use super::*; + use std::collections::hash_map::Entry; + use std::ffi::CStr; + use std::os::fd::AsFd as _; + use std::os::fd::AsRawFd as _; + use std::os::fd::BorrowedFd; + use std::path::PathBuf; + use test_log::test; use test_tag::tag; + use crate::Result; + + use super::super::sys; + use super::super::Btf; + /// Test that we can parse a BPF program string as it may appear in /// `kallsyms` successfully. @@ -126,4 +154,85 @@ mod tests { let name = "bpf_prog_get_curr_or_next"; assert!(BpfProg::parse(name, addr).is_none()); } + + // https://www.kernel.org/doc/html/latest/bpf/btf.html#bpf-prog-load + fn query_line_info( + bpf_fd: BorrowedFd<'_>, + info: &sys::bpf_prog_info, + ) -> Result> { + let name = CStr::from_bytes_until_nul(info.name.as_slice()) + .unwrap() + .to_string_lossy(); + + assert_eq!( + info.line_info_rec_size, + size_of::() as _ + ); + let mut line_info = Vec::::with_capacity(info.nr_line_info as _); + // SAFETY: `bpf_line_info` is valid for any bit pattern, so we + // can adjust the vector's length to its capacity. + let () = unsafe { line_info.set_len(line_info.capacity()) }; + + assert_eq!(info.jited_line_info_rec_size, size_of::() as _); + let mut jited_line_info = Vec::::with_capacity(info.nr_jited_line_info as _); + // SAFETY: `u64` is valid for any bit pattern, so we can adjust + // the vector's length to its capacity. + let () = unsafe { jited_line_info.set_len(jited_line_info.capacity()) }; + + let mut info = sys::bpf_prog_info { + nr_line_info: info.nr_line_info, + line_info_rec_size: info.line_info_rec_size, + line_info: line_info.as_mut_ptr() as _, + nr_jited_line_info: info.nr_jited_line_info, + jited_line_info_rec_size: info.jited_line_info_rec_size, + jited_line_info: jited_line_info.as_mut_ptr() as _, + ..Default::default() + }; + let () = sys::bpf_prog_get_info_from_fd(bpf_fd.as_raw_fd(), &mut info).unwrap(); + + let mut file_cache = HashMap::new(); + let btf = Btf::load_from_id(info.btf_id) + .with_context(|| format!("failed to load BTF information for program `{name}`"))?; + for (i, addr) in jited_line_info.into_iter().enumerate() { + let info = line_info.get(i).unwrap(); + let file = btf.name(info.file_name_off).unwrap(); + + // Check if we already have the file cached (and do so if + // not), to not have dozens of duplicate allocations flying + // around. + let path = match file_cache.entry(file) { + Entry::Vacant(vacancy) => { + let path = Rc::::from(PathBuf::from(file).into_boxed_path()); + vacancy.insert(path) + } + Entry::Occupied(occupancy) => occupancy.into_mut(), + }; + + let line = info.line(); + let col = info.column(); + println!("{addr:#x}\t: {path:?}:{line}:{col}"); + } + Ok(HashMap::new()) + } + + /// XXX + #[test] + fn foobar() { + let mut next_prog_id = 0; + while let Ok(prog_id) = sys::bpf_prog_get_next_id(next_prog_id) { + let fd = sys::bpf_prog_get_fd_from_id(prog_id).unwrap(); + + let mut info = sys::bpf_prog_info::default(); + let () = sys::bpf_prog_get_info_from_fd(fd.as_raw_fd(), &mut info).unwrap(); + + println!( + "found BPF program: {}", + CStr::from_bytes_until_nul(info.name.as_slice()) + .unwrap() + .to_string_lossy() + ); + let _map = query_line_info(fd.as_fd(), &info).unwrap(); + next_prog_id = prog_id; + } + } } diff --git a/src/kernel/bpf/sys.rs b/src/kernel/bpf/sys.rs index 7e43b7c92..79da9fdeb 100644 --- a/src/kernel/bpf/sys.rs +++ b/src/kernel/bpf/sys.rs @@ -13,6 +13,27 @@ type bpf_cmd = c_uint; const BPF_PROG_GET_NEXT_ID: bpf_cmd = 11; const BPF_PROG_GET_FD_BY_ID: bpf_cmd = 13; const BPF_OBJ_GET_INFO_BY_FD: bpf_cmd = 15; +const BPF_BTF_GET_FD_BY_ID: bpf_cmd = 19; + + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +pub struct bpf_line_info { + pub insn_off: u32, + pub file_name_off: u32, + pub line_off: u32, + pub line_col: u32, +} + +impl bpf_line_info { + pub fn line(&self) -> u32 { + self.line_col >> 10 + } + + pub fn column(&self) -> u16 { + (self.line_col & 0x3ff) as _ + } +} #[repr(C)] @@ -61,6 +82,32 @@ pub struct bpf_prog_info { } +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +pub struct bpf_btf_info { + pub btf: u64, + pub btf_size: u32, + pub id: u32, + pub name: u64, + pub name_len: u32, + pub kernel_btf: u32, +} + + +#[repr(C)] +#[derive(Copy, Clone, Debug)] +pub struct btf_header { + pub magic: u16, + pub version: u8, + pub flags: u8, + pub hdr_len: u32, + pub type_off: u32, + pub type_len: u32, + pub str_off: u32, + pub str_len: u32, +} + + /// Defined in `include/uapi/linux/bpf.h`. #[repr(C)] #[derive(Copy, Clone)] @@ -138,11 +185,11 @@ pub fn bpf_prog_get_fd_from_id(prog_id: u32) -> io::Result { Ok(fd) } -pub fn bpf_prog_get_info_from_fd(bpf_fd: RawFd, info: &mut bpf_prog_info) -> io::Result<()> { +fn bpf_obj_get_info_from_fd(bpf_fd: RawFd, info: &mut I) -> io::Result<()> { let mut attr = bpf_attr { info: bpf_attr__bindgen_ty_9 { bpf_fd: bpf_fd as _, - info_len: size_of::() as _, + info_len: size_of::() as _, // NB: Evidently `info` is not just used as output argument // but also as input. info: info as *mut _ as usize as _, @@ -155,6 +202,31 @@ pub fn bpf_prog_get_info_from_fd(bpf_fd: RawFd, info: &mut bpf_prog_info) -> io: Ok(()) } +pub fn bpf_prog_get_info_from_fd(bpf_fd: RawFd, info: &mut bpf_prog_info) -> io::Result<()> { + bpf_obj_get_info_from_fd::(bpf_fd, info) +} + +pub fn bpf_btf_get_fd_from_id(btf_id: u32) -> io::Result { + let mut attr = bpf_attr { + __bindgen_anon_6: bpf_attr__bindgen_ty_8 { + __bindgen_anon_1: bpf_attr__bindgen_ty_8__bindgen_ty_1 { btf_id }, + next_id: 0, + open_flags: 0, + }, + }; + + let attr_size = unsafe { size_of_val(&attr.__bindgen_anon_6) }; + let fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &mut attr, attr_size)?; + // SAFETY: The system call was checked for success and on success a + // valid owned file descriptor is returned. + let fd = unsafe { OwnedFd::from_raw_fd(fd.try_into().unwrap()) }; + Ok(fd) +} + +pub fn bpf_btf_get_info_from_fd(btf_fd: RawFd, info: &mut bpf_btf_info) -> io::Result<()> { + bpf_obj_get_info_from_fd(btf_fd, info) +} + #[cfg(test)] mod tests { diff --git a/src/symbolize/symbolizer.rs b/src/symbolize/symbolizer.rs index 23364b7c0..4e1286722 100644 --- a/src/symbolize/symbolizer.rs +++ b/src/symbolize/symbolizer.rs @@ -1693,6 +1693,13 @@ mod tests { #[test] fn symbolize_kernel_bpf_program() { let addr = bpf_symbolization_target_addr(); - println!("BPF address: {addr:#x}"); + let src = symbolize::Source::Kernel(symbolize::Kernel::default()); + let symbolizer = Symbolizer::new(); + let result = symbolizer + .symbolize_single(&src, symbolize::Input::AbsAddr(addr)) + .unwrap() + .into_sym() + .unwrap(); + assert_eq!(result.name, ""); } }