diff --git a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/Makefile b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/Makefile index bf1b63842..ed7ee3eb9 100644 --- a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/Makefile +++ b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/Makefile @@ -24,7 +24,7 @@ INCLUDES := -I$(OUTPUT) -I../../libbpf/include/uapi -I$(dir $(VMLINUX)) -I$(LIBB CFLAGS := -g -Wall ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) -APPS = paf pr procstat sysstat memleak fraginfo vmasnap drsnoop oomkiller numafraginfo +APPS = paf pr procstat sysstat memleak fraginfo vmasnap drsnoop oomkiller numafraginfo slabrate TARGETS= mem_watcher CARGO ?= $(shell which cargo) diff --git a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/README.md b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/README.md index 473eaf650..ee00e798f 100644 --- a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/README.md +++ b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/README.md @@ -9,6 +9,10 @@ **eBPF 提供了一种高效的机制来监控和追踪系统级别的事件,包括内存的分配和释放。通过 eBPF,可以跟踪内存分配和释放的请求,并收集每次分配的调用堆栈。然后,分析这些信息,找出执行了内存分配但未执行释放操作的调用堆栈,这有助于程序员找出导致内存泄漏的源头。** --- +## TODO list + +- [x] 监控SLAB分配器的内存使用情况 +- [ ] 跟踪共享内存的用量信息 ## 背景意义 @@ -58,7 +62,7 @@ make ``` make后没有编译生成任何的二进制文件,只打印了logo,效果如下: - ![alt text](/docs/image/15.png) + ![alt text](../docs/image/15.png) 打开makefile,检查makefile逻辑,代码如下: ``` @@ -82,7 +86,7 @@ else BZS_APPS := ``` 再次执行make,发现报错为 "vmlinux.h file not find",如下: - ![alt text](/docs/image/16.png) + ![alt text](../docs/image/16.png) 执行以下命令,生成vmlinux.h文件 ``` @@ -125,7 +129,7 @@ registry = "git://crates.rustcc.cn/crates.io-index" ``` 重新安装还是会报错: - ![alt text](/docs/image/17.png) + ![alt text](../docs/image/17.png) 在 `~/.cargo/config` 文件中添加以下内容,即可解决: ``` @@ -133,7 +137,7 @@ registry = "git://crates.rustcc.cn/crates.io-index" git-fetch-with-cli = true ``` 再次make编译完成,生成二进制文件 mem_watcher,并能正常运行。 - ![alt text](/docs/image/18.png) + ![alt text](../docs/image/18.png) # 工具的使用方法说明 diff --git a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/bpf/slabrate.bpf.c b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/bpf/slabrate.bpf.c new file mode 100644 index 000000000..0d275f973 --- /dev/null +++ b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/bpf/slabrate.bpf.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include "mem_watcher.h" + +#define MAX_ENTRIES 10240 + +const volatile pid_t target_pid = 0; + +static struct slabrate_info slab_zero_value = {}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, char *); + __type(value, struct slabrate_info); +} slab_entries SEC(".maps"); + +static int probe_entry(struct kmem_cache *cachep) +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u32 pid = pid_tgid >> 32; + struct slabrate_info *valuep; + const char *name = BPF_CORE_READ(cachep, name); + + if (target_pid && target_pid != pid) + return 0; + + valuep = bpf_map_lookup_elem(&slab_entries, &name); + if (!valuep) { + bpf_map_update_elem(&slab_entries, &name, &slab_zero_value, BPF_ANY); + valuep = bpf_map_lookup_elem(&slab_entries, &name); + if (!valuep) + return 0; + bpf_probe_read_kernel(&valuep->name, sizeof(valuep->name), name); + } + + valuep->count++; + valuep->size += BPF_CORE_READ(cachep, size); + + return 0; +} + +SEC("kprobe/kmem_cache_alloc") +int BPF_KPROBE(kmem_cache_alloc, struct kmem_cache *cachep) +{ + return probe_entry(cachep); +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; \ No newline at end of file diff --git a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/include/mem_watcher.h b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/include/mem_watcher.h index b05ef1200..8febccaab 100644 --- a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/include/mem_watcher.h +++ b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/include/mem_watcher.h @@ -200,4 +200,13 @@ struct event { char comm[TASK_COMM_LEN]; // 被杀死进程的命令名 }; +/* slabrate.h */ +#define CACHE_NAME_SIZE 32 + +struct slabrate_info { + char name[CACHE_NAME_SIZE]; + __u64 count; + __u64 size; +}; + #endif /* __MEM_WATCHER_H */ diff --git a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/mem_watcher.c b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/mem_watcher.c index 8d1fab075..562d38d5e 100644 --- a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/mem_watcher.c +++ b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/mem_watcher.c @@ -37,6 +37,7 @@ #include "memleak.skel.h" #include "vmasnap.skel.h" #include "drsnoop.skel.h" +#include "slabrate.skel.h" #include "mem_watcher.h" #include "fraginfo.h" @@ -122,6 +123,13 @@ struct order_entry struct ctg_info oinfo; }; +#define OUTPUT_ROWS_LIMIT 10240 + +static pid_t target_pid = 0; +static bool clear_screen = true; +static int output_rows = 20; +static int count = 99999999; + int compare_entries(const void *a, const void *b) { struct order_entry *entryA = (struct order_entry *)a; @@ -204,7 +212,6 @@ static volatile bool exiting = false; } \ } -// 为 oomkiller 使用的宏,指定 map_name #define LOAD_AND_ATTACH_SKELETON_WITH_MAP(skel, event, map_name) \ do \ { \ @@ -230,7 +237,6 @@ static volatile bool exiting = false; } \ } while (0) -// 保留原有逻辑的宏 #define LOAD_AND_ATTACH_SKELETON(skel, event) \ do \ { \ @@ -273,7 +279,8 @@ static struct env int interval; // 打印间隔,单位为秒 int duration; // 运行时长,单位为秒 bool part2; // 是否启用系统内存状态报告的扩展部分 - bool oomkiller; // 是否启用oomkiller事件处理 + bool oomkiller; // 是否启用oomkiller事件处理 + bool slabrate; long choose_pid; // 选择的进程号 bool rss; // 是否打印进程页面信息 @@ -292,8 +299,9 @@ static struct env .print_time = false, // 默认不打印地址申请时间 .rss = false, // 默认不打印进程页面信息 .part2 = false, // 默认关闭系统内存状态报告的扩展部分 - .oomkiller = false, // 默认关闭oomkiller事件处理 - .choose_pid = 0, // 默认不选择特定进程 + .oomkiller = false, // 默认关闭oomkiller事件处理 + .slabrate = false, + .choose_pid = 0, // 默认不选择特定进程 .interval = 1, // 默认打印间隔为1秒 .duration = 10, // 默认持续运行10秒 }; @@ -343,7 +351,9 @@ static const struct argp_option opts[] = { {"oomkiller", 'o', 0, 0, "print oomkiller (内存不足时被杀死的进程信息)"}, {0, 0, 0, 0, "numafraginfo:", 16}, {"numafraginfo", 'N', 0, 0, "print numafraginfo"}, - + + {0, 0, 0, 0, "slabrate:", 17}, + {"slabrate", 'e', 0, 0, "print slabrate"}, {NULL, 'h', NULL, OPTION_HIDDEN, "show the full help"}, {0}, @@ -409,6 +419,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'N': env.numafraginfo = true; break; + case 'e': + env.slabrate = true; + break; default: return ARGP_ERR_UNKNOWN; } @@ -449,6 +462,7 @@ static int process_numafraginfo(struct numafraginfo_bpf *skel_numafraginfo); static int process_vmasnap(struct vmasnap_bpf *skel_vmasnap); static int process_drsnoop(struct drsnoop_bpf *skel_drsnoop); static int process_oomkiller(struct oomkiller_bpf *skel_oomkiller); // 新增的oomkiller处理函数原型 +static int process_slabrate(struct slabrate_bpf *skel_slabrate); static int handle_event_oomkiller(void *ctx, void *data, size_t data_sz); // 新增的oomkiller事件处理函数 static __u64 adjust_time_to_program_start_time(__u64 first_query_time); static int update_addr_times(struct memleak_bpf *skel_memleak); @@ -469,7 +483,8 @@ int main(int argc, char **argv) struct numafraginfo_bpf *skel_numafraginfo; struct vmasnap_bpf *skel_vmasnap; struct oomkiller_bpf *skel_oomkiller; - struct drsnoop_bpf *skel_drsnoop; + struct drsnoop_bpf *skel_drsnoop; + struct slabrate_bpf *skel_slabrate; err = argp_parse(&argp, argc, argv, 0, NULL, NULL); if (err) @@ -538,6 +553,51 @@ int main(int argc, char **argv) { PROCESS_SKEL(skel_drsnoop, drsnoop); } + else if (env.slabrate) + { + PROCESS_SKEL(skel_slabrate, slabrate); + } + else + { + fprintf(stderr, "No valid option was given\n"); + return 1; + } + + if (env.paf) + { + err = process_paf(skel_paf); + } + else if (env.pr) + { + err = process_pr(skel_pr); + } + else if (env.procstat) + { + err = process_procstat(skel_procstat); + } + else if (env.fraginfo) + { + err = process_fraginfo(skel_fraginfo); + } + else if(env.numafraginfo){ + err = process_numafraginfo(skel_numafraginfo); + } + else if (env.vmasnap) + { + err = process_vmasnap(skel_vmasnap); + } + else if (env.sysstat) + { + err = process_sysstat(skel_sysstat); + } + else if (env.memleak) + { + err = process_memleak(skel_memleak, env); + } + else if (env.oomkiller) // 处理 oomkiller + { + err = process_oomkiller(skel_oomkiller); // 使用处理 oom + } return 0; } @@ -1706,3 +1766,106 @@ static int process_drsnoop(struct drsnoop_bpf *skel_drsnoop) { return err < 0 ? -err : 0; } + +static int sort_column(const void *obj1, const void *obj2) +{ + struct slabrate_info *s1 = (struct slabrate_info *)obj1; + struct slabrate_info *s2 = (struct slabrate_info *)obj2; + + return s2->size - s1->size; +} + +static int print_stat(struct slabrate_bpf *obj) +{ + char *key, **prev_key = NULL; + static struct slabrate_info values[OUTPUT_ROWS_LIMIT]; + int i, err = 0, rows = 0; + int fd = bpf_map__fd(obj->maps.slab_entries); + + printf("%-32s %6s %10s\n", "CACHE", "ALLOCS", "BYTES"); + + while (1) { + err = bpf_map_get_next_key(fd, prev_key, &key); + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + return err; + } + err = bpf_map_lookup_elem(fd, &key, &values[rows++]); + if (err) { + return err; + } + prev_key = &key; + } + + qsort(values, rows, sizeof(struct slabrate_info), sort_column); + rows = rows < output_rows ? rows : output_rows; + for (i = 0; i < rows; i++) + printf("%-32s %6lld %10lld\n", + values[i].name, values[i].count, values[i].size); + + printf("\n"); + prev_key = NULL; + + while (1) { + err = bpf_map_get_next_key(fd, prev_key, &key); + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + return err; + } + err = bpf_map_delete_elem(fd, &key); + if (err) { + return err; + } + prev_key = &key; + } + return err; +} + +static int process_slabrate(struct slabrate_bpf *skel_slabrate) { + int err; + + bpf_program__set_autoload(skel_slabrate->progs.kmem_cache_alloc, true); + + skel_slabrate->rodata->target_pid = target_pid; + + err = slabrate_bpf__load(skel_slabrate); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + return 1; + } + + err = slabrate_bpf__attach(skel_slabrate); + if (err) { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto slabrate_cleanup; + } + + while (1) { + sleep(1); + + if (clear_screen) { + err = system("clear"); + if (err) + goto slabrate_cleanup; + } + + err = print_stat(skel_slabrate); + if (err) + goto slabrate_cleanup; + + count--; + if (exiting || !count) + goto slabrate_cleanup; + } + +slabrate_cleanup: + slabrate_bpf__destroy(skel_slabrate); + + return err != 0; +} \ No newline at end of file diff --git a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/test/test_mem.c b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/test/test_mem.c index e4182bb29..5a3a483bf 100644 --- a/eBPF_Supermarket/Memory_Subsystem/mem_watcher/test/test_mem.c +++ b/eBPF_Supermarket/Memory_Subsystem/mem_watcher/test/test_mem.c @@ -19,11 +19,13 @@ static struct env { bool mem_leak; bool mem_unleak; bool mem_stress_test; + bool simulate_leak; } env = { .overall_leak_test = false, .mem_leak = false, .mem_unleak = false, - .mem_stress_test = false + .mem_stress_test = false, + .simulate_leak = false }; static volatile bool running = true; // 控制程序是否继续运行 @@ -36,6 +38,7 @@ static const struct argp_option opts[] = { { "detect-leak", 'l', NULL, 0, "Detect memory leaks", 3 }, { "no-leak", 'n', NULL, 0, "No memory leaks expected", 3 }, { "stress-test", 's', NULL, 0, "Perform memory stress test", 4 }, + { "simulate-leak", 'm', NULL, 0, "Simulate memory leak with complex objects", 5 }, { NULL, 'h', NULL, OPTION_HIDDEN, "show the full help", 0 }, { NULL, 0, NULL, 0, NULL, 0 } }; @@ -56,6 +59,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 's': env.mem_stress_test = true; break; + case 'm': + env.simulate_leak = true; + break; case 'h': argp_state_help(state, stderr, ARGP_HELP_STD_HELP); break; @@ -65,6 +71,45 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return 0; } +typedef struct { + int* data; +} ComplexObject; + +ComplexObject* createComplexObject() { + ComplexObject* obj = (ComplexObject*)malloc(sizeof(ComplexObject)); + if (obj != NULL) { + obj->data = (int*)malloc(1000 * sizeof(int)); + if (obj->data != NULL) { + for (int i = 0; i < 1000; ++i) { + obj->data[i] = rand() % 100; // 填充数据 + } + return obj; + } else { + free(obj); + return NULL; + } + } else { + return NULL; + } +} + +void destroyComplexObject(ComplexObject* obj) { + if (obj != NULL) { + if (obj->data != NULL) { + free(obj->data); + } + free(obj); + } +} + +void simulateMemoryLeak() { + // 动态分配一个复杂对象的数组 + ComplexObject* objects[1000]; + for (int i = 0; i < 1000; ++i) { + objects[i] = createComplexObject(); + } +} + // 模拟一些处理,通过写入分配的内存 static void process_data(void *ptr, int size) { memset(ptr, 0, size); @@ -203,6 +248,13 @@ int main(int argc, char **argv) { } } + if (env.simulate_leak) { + // 模拟复杂对象内存泄漏 + for (int i = 0; i < 1000; ++i) { + simulateMemoryLeak(); + } + } + if (env.mem_stress_test) { // 打印当前进程的进程号(PID) pid_t pid = getpid();