Skip to content

Commit

Permalink
Add support for symbolizing BPF program kernel addresses
Browse files Browse the repository at this point in the history
This change adds the remaining plumbing for symbolizing BPF program
kernel addresses. When a kernel address falls into a BPF program, we
query all the necessary information to see if the kernel is able to
provide us with source code information about said address and furnish
up the corresponding CodeInfo object to include it in the symbolization
result.

Closes: libbpf#826

Signed-off-by: Daniel Müller <[email protected]>
  • Loading branch information
d-e-s-o committed Oct 18, 2024
1 parent b227352 commit f780795
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 11 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ jobs:
rust: stable
profile: dev
args: "--lib --no-default-features --features=breakpad"
- runs-on: ubuntu-latest
rust: stable
profile: dev
args: "--lib --no-default-features --features=bpf"
- runs-on: ubuntu-latest
rust: stable
profile: dev
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
Unreleased
----------
- Added support for symbolizing BPF kernel program addresses including
source code information
- Added `bpf` (default disabled) feature
- Added support for iteration over DWARF symbols to `inspect::Inspector`
- Adjusted normalization logic to use "symbolic path" for reading build
IDs when normalizing with `NormalizeOpts::map_files` equal to `false`
Expand Down
11 changes: 11 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,17 @@ where
self.ok_or_error(io::ErrorKind::InvalidInput, f)
}

/// Unwrap `self` into an `Ok` or an [`Error`] of the
/// [`ErrorKind::NotFound`] kind.
#[inline]
fn ok_or_not_found<C, F>(self, f: F) -> Result<T, Error>
where
C: ToString,
F: FnOnce() -> C,
{
self.ok_or_error(io::ErrorKind::NotFound, f)
}

/// Unwrap `self` into an `Ok` or an [`Error`] of the
/// [`ErrorKind::UnexpectedEof`] kind.
#[inline]
Expand Down
202 changes: 195 additions & 7 deletions src/kernel/bpf/prog.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,37 @@
use std::borrow::Cow;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt::Debug;
use std::fmt::Display;
use std::fmt::Formatter;
use std::fmt::Result as FmtResult;
use std::iter;
use std::mem::size_of;
use std::os::fd::AsFd as _;
use std::os::fd::AsRawFd as _;
use std::os::fd::BorrowedFd;
use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;
use std::str::FromStr;

use crate::inspect::SymInfo;
use crate::log;
use crate::once::OnceCell;
use crate::symbolize::CodeInfo;
use crate::symbolize::FindSymOpts;
use crate::symbolize::ResolvedSym;
use crate::symbolize::SrcLang;
use crate::Addr;
use crate::Error;
use crate::ErrorExt as _;
use crate::IntoError as _;
use crate::Result;
use crate::SymType;

use super::sys;
use super::Btf;


/// BPF kernel programs show up with this prefix followed by a tag and
Expand Down Expand Up @@ -154,12 +164,103 @@ impl BpfInfoCache {
}


#[derive(Debug)]
struct LineInfoRecord {
path: Rc<Path>,
line: u32,
col: u16,
}


/// Query BPF program line information.
fn query_line_info(
bpf_fd: BorrowedFd<'_>,
info: &sys::bpf_prog_info,
) -> Result<Option<HashMap<Addr, LineInfoRecord>>> {
let prog_id = info.id;

assert_eq!(
info.line_info_rec_size,
size_of::<sys::bpf_line_info>() as _
);
let mut line_info = Vec::<sys::bpf_line_info>::with_capacity(info.nr_line_info as _);
// SAFETY: `bpf_line_info` is valid for any bit pattern, so we
// can adjust the vector's length to its capacity.
let () = unsafe { line_info.set_len(line_info.capacity()) };

assert_eq!(info.jited_line_info_rec_size, size_of::<u64>() as _);
let mut jited_line_info = Vec::<u64>::with_capacity(info.nr_jited_line_info as _);
// SAFETY: `u64` is valid for any bit pattern, so we can adjust
// the vector's length to its capacity.
let () = unsafe { jited_line_info.set_len(jited_line_info.capacity()) };

let mut info = sys::bpf_prog_info {
nr_line_info: info.nr_line_info,
line_info_rec_size: info.line_info_rec_size,
line_info: line_info.as_mut_ptr() as _,
nr_jited_line_info: info.nr_jited_line_info,
jited_line_info_rec_size: info.jited_line_info_rec_size,
jited_line_info: jited_line_info.as_mut_ptr() as _,
..Default::default()
};
let () = sys::bpf_prog_get_info_from_fd(bpf_fd.as_raw_fd(), &mut info).with_context(|| {
format!("failed to retrieve BPF program information for program {prog_id}")
})?;

let mut line_records = HashMap::with_capacity(info.nr_jited_line_info as _);
let mut file_cache = HashMap::new();

let btf = if let Some(btf) = Btf::load_from_id(info.btf_id)
.with_context(|| format!("failed to load BTF information for program {prog_id}"))?
{
btf
} else {
// We don't have BTF information available. There is nothing we
// can do. Bail out gracefully.
return Ok(None)
};

for (i, addr) in jited_line_info.into_iter().enumerate() {
let info = line_info.get(i).ok_or_invalid_data(|| {
format!("failed to get BPF program {prog_id} line record {i} for address {addr:#x}")
})?;
let file = btf.name(info.file_name_off).ok_or_invalid_data(|| {
format!(
"failed to retrieve BPF program {prog_id} file information for address {addr:#x}"
)
})?;

// Check if we already have the file cached (and do so if
// not), to not have dozens of duplicate allocations flying
// around.
let path = match file_cache.entry(file) {
Entry::Vacant(vacancy) => {
let path = Rc::<Path>::from(PathBuf::from(file).into_boxed_path());
vacancy.insert(path)
}
Entry::Occupied(occupancy) => occupancy.into_mut(),
};

let _prev = line_records.insert(
addr,
LineInfoRecord {
path: Rc::clone(path),
line: info.line(),
col: info.column(),
},
);
}
Ok(Some(line_records))
}


/// Information about a BPF program.
#[derive(Debug)]
pub struct BpfProg {
addr: Addr,
name: Box<str>,
tag: BpfTag,
line_info: OnceCell<Option<HashMap<Addr, LineInfoRecord>>>,
}

impl BpfProg {
Expand All @@ -174,19 +275,89 @@ impl BpfProg {
addr,
name: Box::from(name),
tag,
line_info: OnceCell::new(),
};
Some(prog)
}

pub fn resolve(&self, _addr: Addr, _opts: &FindSymOpts) -> Result<ResolvedSym<'_>> {
// TODO: Need to look up BPF specific information.
let BpfProg { name, addr, .. } = self;
fn retrieve_code_info(
&self,
addr: Addr,
info_cache: &BpfInfoCache,
) -> Result<Option<CodeInfo<'_>>> {
let line_info = self.line_info.get_or_try_init(|| {
let prog_info = info_cache.lookup(self.tag)?.ok_or_not_found(|| {
format!(
"failed to find information for BPF program with tag {}",
self.tag
)
})?;

let fd = sys::bpf_prog_get_fd_from_id(prog_info.id).with_context(|| {
format!(
"failed to retrieve BPF program file descriptor for program {}",
prog_info.id
)
})?;

let line_info = query_line_info(fd.as_fd(), &prog_info).with_context(|| {
format!(
"failed to query line information for BPF program {}",
prog_info.id
)
})?;
Result::<_, Error>::Ok(line_info)
})?;

let code_info = if let Some(line_info) = line_info {
if let Some(line_record) = line_info.get(&addr) {
let code_info = CodeInfo {
dir: None,
file: Cow::Borrowed(line_record.path.as_os_str()),
line: Some(line_record.line),
column: Some(line_record.col),
_non_exhaustive: (),
};
Some(code_info)
} else {
log::debug!(
"BPF code information does not contain information for address {addr:#x}"
);
None
}
} else {
log::debug!("BPF program for address {addr:#x} does not have code information present");
None
};

Ok(code_info)
}

pub fn resolve(
&self,
addr: Addr,
opts: &FindSymOpts,
info_cache: &BpfInfoCache,
) -> Result<ResolvedSym<'_>> {
let code_info = if opts.code_info() {
self.retrieve_code_info(addr, info_cache)?
} else {
None
};

let BpfProg {
name,
addr: prog_addr,
..
} = self;
let sym = ResolvedSym {
name,
addr: *addr,
addr: *prog_addr,
// TODO: May be able to use `bpf_prog_info::func_info` here.
// Unsure.
size: None,
lang: SrcLang::Unknown,
code_info: None,
code_info,
inlined: Box::new([]),
};
Ok(sym)
Expand Down Expand Up @@ -230,8 +401,6 @@ impl<'prog> TryFrom<&'prog BpfProg> for SymInfo<'prog> {
mod tests {
use super::*;

use std::os::fd::AsFd as _;

use test_log::test;
use test_tag::tag;

Expand Down Expand Up @@ -284,4 +453,23 @@ mod tests {

assert_eq!(BpfTag::from(info.tag), tag);
}

/// Check that we can query line information for all loaded
/// programs.
///
/// This is mostly meant as a catch-all sanity check, as no programs
/// *may* be loaded.
#[test]
fn line_info_querying() {
let mut next_prog_id = 0;
while let Ok(prog_id) = sys::bpf_prog_get_next_id(next_prog_id) {
let fd = sys::bpf_prog_get_fd_from_id(prog_id).unwrap();

let mut info = sys::bpf_prog_info::default();
let () = sys::bpf_prog_get_info_from_fd(fd.as_raw_fd(), &mut info).unwrap();

let _line_info = query_line_info(fd.as_fd(), &info).unwrap();
next_prog_id = prog_id;
}
}
}
20 changes: 17 additions & 3 deletions src/kernel/ksym.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,14 @@ use crate::Error;
use crate::Result;
use crate::SymType;

#[cfg(feature = "bpf")]
use super::bpf::BpfInfoCache;
#[cfg(feature = "bpf")]
use super::bpf::BpfProg;

#[cfg(not(feature = "bpf"))]
type BpfInfoCache = ();

pub const KALLSYMS: &str = "/proc/kallsyms";
const DFL_KSYM_CAP: usize = 200000;

Expand All @@ -56,11 +61,16 @@ impl Ksym {
})
}

fn resolve(&self, addr: Addr, opts: &FindSymOpts) -> Result<ResolvedSym<'_>> {
fn resolve(
&self,
addr: Addr,
opts: &FindSymOpts,
_bpf_info_cache: &BpfInfoCache,
) -> Result<ResolvedSym<'_>> {
match self {
Ksym::Kfunc(kfunc) => kfunc.resolve(addr, opts),
#[cfg(feature = "bpf")]
Ksym::BpfProg(bpf_prog) => bpf_prog.resolve(addr, opts),
Ksym::BpfProg(bpf_prog) => bpf_prog.resolve(addr, opts, _bpf_info_cache),
}
}

Expand Down Expand Up @@ -164,6 +174,7 @@ pub(crate) struct KSymResolver {
by_name_idx: OnceCell<Box<[usize]>>,
syms: Box<[Ksym]>,
file_name: PathBuf,
bpf_info_cache: BpfInfoCache,
}

impl KSymResolver {
Expand Down Expand Up @@ -214,6 +225,7 @@ impl KSymResolver {
syms: syms.into_boxed_slice(),
by_name_idx: OnceCell::new(),
file_name: path.to_path_buf(),
bpf_info_cache: BpfInfoCache::default(),
};
Ok(slf)
}
Expand All @@ -231,6 +243,7 @@ impl KSymResolver {
.into_boxed_slice(),
by_name_idx: OnceCell::new(),
file_name: PathBuf::new(),
bpf_info_cache: BpfInfoCache::default(),
}
}

Expand Down Expand Up @@ -272,7 +285,7 @@ impl Symbolize for KSymResolver {
fn find_sym(&self, addr: Addr, opts: &FindSymOpts) -> Result<Result<ResolvedSym<'_>, Reason>> {
match self.find_ksym(addr) {
Ok(ksym) => {
let sym = ksym.resolve(addr, opts)?;
let sym = ksym.resolve(addr, opts, &self.bpf_info_cache)?;
Ok(Ok(sym))
}
Err(reason) => Ok(Err(reason)),
Expand Down Expand Up @@ -355,6 +368,7 @@ mod tests {
syms: Box::default(),
by_name_idx: OnceCell::new(),
file_name: PathBuf::new(),
bpf_info_cache: BpfInfoCache::default(),
};
assert_ne!(format!("{resolver:?}"), "");

Expand Down
Loading

0 comments on commit f780795

Please sign in to comment.