From 0c4cdaf0b6fea5b6357910c2607529d8167e36f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BA=91=E5=BE=AE?= <1067852565@qq.com> Date: Mon, 6 Nov 2023 22:18:03 +0000 Subject: [PATCH] daemon: add process exec tracing (#70) * Add exec * Add traceing for exec and exit * add time config * add control of auto attach program * add blank * change to debug --- README.md | 2 +- daemon/bpf_tracer_event.h | 13 +++- daemon/kernel/bpf_tracer.bpf.c | 63 ++++++++++++++++++ daemon/user/bpf_tracer.cpp | 77 +++++++++++++++++++++- daemon/user/daemon_config.hpp | 12 +++- daemon/user/handle_bpf_event.cpp | 16 +++++ daemon/user/handle_bpf_event.hpp | 2 + runtime/src/bpftime_shm.cpp | 1 + runtime/src/handler/perf_event_handler.cpp | 10 ++- 9 files changed, 188 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 723cfb2b..df7929b5 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ see [documents/build-and-test.md](https://github.com/eunomia-bpf/bpftime/tree/ma - [X] ring buffer output support. - [X] perf event output support. -- [ ] Figure out how to run transparently with kernel probe +- [X] Figure out how to run transparently with kernel probe - [ ] An AOT compiler for eBPF can be easily added based on the LLVM IR. - [ ] More examples and usecases: - [ ] Network on userspace eBPF diff --git a/daemon/bpf_tracer_event.h b/daemon/bpf_tracer_event.h index 3303add3..1f4de85b 100644 --- a/daemon/bpf_tracer_event.h +++ b/daemon/bpf_tracer_event.h @@ -5,10 +5,10 @@ #define TASK_COMM_LEN 16 #define NAME_MAX 255 #define INVALID_UID ((uid_t)-1) - #define MAX_INSN_SIZE 128 - #define BPF_OBJ_NAME_LEN 16U +#define MAX_FILENAME_LEN 127 + enum event_type { SYS_OPEN, @@ -17,6 +17,7 @@ enum event_type { SYS_IOCTL, SYS_PERF_EVENT_OPEN, BPF_PROG_LOAD_EVENT, + EXEC_EXIT, }; enum bpf_fd_type { @@ -87,6 +88,14 @@ struct event { int bpf_prog_id; } ioctl_data; + + struct { + int exit_event; + int ppid; + unsigned exit_code; + unsigned long long time_ns; + char filename[MAX_FILENAME_LEN]; + } exec_data; }; }; diff --git a/daemon/kernel/bpf_tracer.bpf.c b/daemon/kernel/bpf_tracer.bpf.c index 4065b849..cb941809 100644 --- a/daemon/kernel/bpf_tracer.bpf.c +++ b/daemon/kernel/bpf_tracer.bpf.c @@ -601,4 +601,67 @@ int tracepoint__syscalls__sys_exit_ioctl(struct trace_event_raw_sys_exit *ctx) return 0; } +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, pid_t); + __type(value, struct event); +} exec_start SEC(".maps"); + +SEC("tp/sched/sched_process_exec") +int handle_exec(struct trace_event_raw_sched_process_exec *ctx) +{ + struct task_struct *task; + unsigned fname_off; + struct event e = {0}; + pid_t pid; + u64 ts; + + /* remember time exec() was executed for this PID */ + pid = bpf_get_current_pid_tgid() >> 32; + e.exec_data.time_ns = bpf_ktime_get_ns(); + + /* fill out the sample with data */ + task = (struct task_struct *)bpf_get_current_task(); + + e.type = EXEC_EXIT; + e.exec_data.exit_event = false; + e.pid = pid; + e.exec_data.ppid = BPF_CORE_READ(task, real_parent, tgid); + bpf_get_current_comm(&e.comm, sizeof(e.comm)); + + fname_off = ctx->__data_loc_filename & 0xFFFF; + bpf_probe_read_str(&e.exec_data.filename, sizeof(e.exec_data.filename), + (void *)ctx + fname_off); + + /* successfully submit it to user-space for post-processing */ + bpf_map_update_elem(&exec_start, &pid, &e, BPF_ANY); + return 0; +} + +SEC("tp/sched/sched_process_exit") +int handle_exit(struct trace_event_raw_sched_process_template *ctx) +{ + struct task_struct *task; + struct event *e; + pid_t pid, tid; + u64 id, ts, *start_ts, duration_ns = 0; + + /* get PID and TID of exiting thread/process */ + id = bpf_get_current_pid_tgid(); + pid = id >> 32; + tid = (u32)id; + + /* ignore thread exits */ + if (pid != tid) + return 0; + + /* if we recorded start of the process, calculate lifetime duration */ + e = bpf_map_lookup_elem(&exec_start, &pid); + if (!e) + return 0; + bpf_map_delete_elem(&exec_start, &pid); + return 0; +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/daemon/user/bpf_tracer.cpp b/daemon/user/bpf_tracer.cpp index 30a75575..580f9ff3 100644 --- a/daemon/user/bpf_tracer.cpp +++ b/daemon/user/bpf_tracer.cpp @@ -17,6 +17,7 @@ #include "daemon.hpp" #include #include +#include #include #define NSEC_PER_SEC 1000000000ULL @@ -50,6 +51,70 @@ static int handle_event_rb(void *ctx, void *data, size_t data_sz) return handler->handle_event(e); } +static int process_exec_maps(bpf_event_handler *handler, bpf_tracer_bpf *obj, + daemon_config &env) +{ + if (!obj || obj->maps.exec_start == NULL) { + return 0; + } + event e; + int pid = 0, next_pid = 0; + + static std::map pid_map; + + std::map new_pid_map = {}; + std::map remain_pid_map = pid_map; + + if (bpf_map__get_next_key(obj->maps.exec_start, NULL, &next_pid, + sizeof(pid)) != 0) { + return 0; + } + do { + pid = next_pid; + int res = bpf_map__lookup_elem(obj->maps.exec_start, &pid, + sizeof(pid), &e, sizeof(e), 0); + if (res != 0) { + continue; + } + struct timespec ts; + long long current_nanoseconds; + long start_time_ms; + // CLOCK_MONOTONIC ensures the time won't go back due to NTP + // adjustments CLOCK_REALTIME could be used if you want the real + // current time + if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) { + // calculates total nanoseconds + current_nanoseconds = + ts.tv_sec * 1000000000LL + ts.tv_nsec; + } else { + return 0; + } + start_time_ms = + (current_nanoseconds - e.exec_data.time_ns) / 1000000; + if (start_time_ms < env.duration_ms) { + // ignore short-lived processes + return 0; + } + // find exec new process and exit process + new_pid_map[pid] = e; + if (remain_pid_map.find(pid) == remain_pid_map.end()) { + // new pid + handle_event_rb(handler, &e, sizeof(e)); + } else { + remain_pid_map.erase(pid); + } + } while (bpf_map__get_next_key(obj->maps.exec_start, &pid, &next_pid, + sizeof(pid)) == 0); + // remain pid is exit process, not occur in the map + for (auto pid : remain_pid_map) { + e = pid.second; + e.exec_data.exit_event = 1; + handle_event_rb(handler, &e, sizeof(e)); + } + pid_map = new_pid_map; + return 0; +} + int bpftime::start_daemon(struct daemon_config env) { LIBBPF_OPTS(bpf_object_open_opts, open_opts); @@ -101,6 +166,15 @@ int bpftime::start_daemon(struct daemon_config env) false); } + if (!env.enable_auto_attach) { + bpf_program__set_autoload( + obj->progs.handle_exit, + false); + bpf_program__set_autoload( + obj->progs.handle_exec, + false); + } + bpftime_driver driver(env, obj); // update handler config bpf_event_handler handler = bpf_event_handler(env, driver); @@ -129,12 +203,13 @@ int bpftime::start_daemon(struct daemon_config env) /* main: poll */ while (!exiting) { - err = ring_buffer__poll(rb, 100 /* timeout, ms */); + err = ring_buffer__poll(rb, 300 /* timeout, ms */); if (err < 0 && err != -EINTR) { spdlog::error("error polling perf buffer: {}", strerror(-err)); // goto cleanup; } + process_exec_maps(&handler, obj, env); /* reset err to return 0 if exiting */ err = 0; } diff --git a/daemon/user/daemon_config.hpp b/daemon/user/daemon_config.hpp index d72dd2b4..77ee2e3a 100644 --- a/daemon/user/daemon_config.hpp +++ b/daemon/user/daemon_config.hpp @@ -19,8 +19,10 @@ struct daemon_config { bool show_open = false; // enable replace prog to support bypass kernel verifier bool enable_replace_prog = false; - // enable replace uprobe to make kernel uprobe not break user space uprobe + // enable replace uprobe to make kernel uprobe not break user space + // uprobe bool enable_replace_uprobe = true; + // use the new uprobe path to replace the old one in original syscall char new_uprobe_path[PATH_LENTH] = "\0"; // bpftime cli path for bpftime daemon to create prog and link, maps std::string bpftime_cli_path = "~/.bpftime/bpftime"; @@ -28,8 +30,14 @@ struct daemon_config { std::string bpftime_tool_path = "~/.bpftime/bpftimetool"; // should bpftime be involve bool is_driving_bpftime = true; - + // should trace and submit bpf related detail events bool submit_bpf_events = true; + // specify whether uprobe should work similar to kernel uprobe and auto + // attach to the target process + bool enable_auto_attach = false; + // minimal duration of a process to be traced by uprobe + // skip short lived process to reduce overhead + int duration_ms = 1000; }; #endif // BPFTIME_DAEMON_CONFIG_HPP diff --git a/daemon/user/handle_bpf_event.cpp b/daemon/user/handle_bpf_event.cpp index a0ca2635..bf2ce03c 100644 --- a/daemon/user/handle_bpf_event.cpp +++ b/daemon/user/handle_bpf_event.cpp @@ -79,6 +79,19 @@ int bpf_event_handler::handle_open_events(const struct event *e) return 0; } +int bpf_event_handler::handle_exec_and_exit(const struct event *e) +{ + + if (e->exec_data.exit_event == false) { + spdlog::info("EXEC {:<6} {:<16} {}", e->pid, e->comm, + e->exec_data.filename); + } else { + spdlog::info("EXIT {:<6} {:<16} {}", e->pid, e->comm, + e->exec_data.exit_code); + } + return 0; +} + static const char *bpf_cmd_strings[] = { "BPF_MAP_CREATE", "BPF_MAP_LOOKUP_ELEM", "BPF_MAP_UPDATE_ELEM", "BPF_MAP_DELETE_ELEM", @@ -395,6 +408,9 @@ int bpf_event_handler::handle_event(const struct event *e) case SYS_IOCTL: return handle_ioctl(e); break; + case EXEC_EXIT: + return handle_exec_and_exit(e); + break; } return 0; } diff --git a/daemon/user/handle_bpf_event.hpp b/daemon/user/handle_bpf_event.hpp index cc67e6b5..f16d7dcc 100644 --- a/daemon/user/handle_bpf_event.hpp +++ b/daemon/user/handle_bpf_event.hpp @@ -23,6 +23,8 @@ class bpf_event_handler { int handle_perf_event_open(const struct event *e); int handle_load_bpf_prog_event(const struct event *e); int handle_ioctl(const struct event *e); + + int handle_exec_and_exit(const struct event *e); public: // callback function for bpf events in ring buffer int handle_event(const struct event *e); diff --git a/runtime/src/bpftime_shm.cpp b/runtime/src/bpftime_shm.cpp index 92f1b388..c90d5b49 100644 --- a/runtime/src/bpftime_shm.cpp +++ b/runtime/src/bpftime_shm.cpp @@ -112,6 +112,7 @@ int bpftime_attach_perf_to_bpf(int perf_fd, int bpf_fd) return shm_holder.global_shared_memory.attach_perf_to_bpf(perf_fd, bpf_fd); } + int bpftime_add_ringbuf_fd_to_epoll(int ringbuf_fd, int epoll_fd, epoll_data_t extra_data) { diff --git a/runtime/src/handler/perf_event_handler.cpp b/runtime/src/handler/perf_event_handler.cpp index e33beaf5..c228c96b 100644 --- a/runtime/src/handler/perf_event_handler.cpp +++ b/runtime/src/handler/perf_event_handler.cpp @@ -12,7 +12,6 @@ #pragma GCC diagnostic ignored "-Wstrict-aliasing" - #define READ_ONCE_U64(x) (*(volatile uint64_t *)&x) #define WRITE_ONCE_U64(x, v) (*(volatile uint64_t *)&x) = (v) @@ -85,16 +84,19 @@ bpf_perf_event_handler::bpf_perf_event_handler( { } + bool software_perf_event_data::has_data() const { auto &ref = get_header_ref_const(); return ref.data_tail != ref.data_head; } + const perf_event_mmap_page & software_perf_event_data::get_header_ref_const() const { return *(perf_event_mmap_page *)(uintptr_t)(mmap_buffer.data()); } + int software_perf_event_data::output_data(const void *buf, size_t size) { spdlog::debug("Handling perf event output data with size {}", size); @@ -117,7 +119,7 @@ int software_perf_event_data::output_data(const void *buf, size_t size) // the data. In this way, we'll never make data_head equals to // data_tail, at situation other than an empty buffer if (available_size <= head.header.size) { - spdlog::warn( + spdlog::debug( "Dropping data with size {}, available_size {}, required size {}", size, available_size, head.header.size); return 0; @@ -146,6 +148,7 @@ int software_perf_event_data::output_data(const void *buf, size_t size) return 0; } + perf_event_mmap_page &software_perf_event_data::get_header_ref() { return *(perf_event_mmap_page *)(uintptr_t)(mmap_buffer.data()); @@ -187,10 +190,12 @@ void *software_perf_event_data::ensure_mmap_buffer(size_t buffer_size) } return mmap_buffer.data(); } + size_t software_perf_event_data::mmap_size() const { return mmap_buffer.size() - pagesize; } + std::optional bpf_perf_event_handler::try_get_software_perf_data_weak_ptr() const { @@ -211,4 +216,5 @@ bpf_perf_event_handler::try_get_software_perf_data_raw_buffer( return {}; } } + } // namespace bpftime