diff --git a/.github/workflows/kvm_watcher.yml b/.github/workflows/kvm_watcher.yml index c21216604..982860008 100644 --- a/.github/workflows/kvm_watcher.yml +++ b/.github/workflows/kvm_watcher.yml @@ -45,5 +45,6 @@ jobs: sudo ./kvm_watcher -n -t 10 sudo ./kvm_watcher -d -t 10 sudo ./kvm_watcher -f -m -t 10 + sudo ./kvm_watcher -i -t 10 make clean diff --git a/eBPF_Supermarket/kvm_watcher/README.md b/eBPF_Supermarket/kvm_watcher/README.md index 5991795be..7eb30deed 100755 --- a/eBPF_Supermarket/kvm_watcher/README.md +++ b/eBPF_Supermarket/kvm_watcher/README.md @@ -18,6 +18,10 @@ - **vCPU相关指标分析:** - 记录有关vCPU的性能指标,包括唤醒时的时间戳,halt持续时间,vCPU id等相关信息。 - 实时监控vCPU的halt-polling时间的变化信息,包括vCPU的线程tid,变化类型,变化前后的halt-polling时间等信息。 +- **kvm中中断注入时相关信息:** + - PIC:实时记录PIC芯片类型,中断引脚编号,中断触发方式,是否可屏蔽,处理延时,是否发生合并等信息。 + - IOAPIC: + - MSI: ## 三、使用方法 @@ -52,6 +56,7 @@ BPF program used for monitoring KVM event -d, --mark_page_dirty Monitor virtual machine dirty page information. -e, --vm_exit Monitoring the event of vm exit. -f, --kvmmmu_page_fault Monitoring the data of kvmmmu page fault. + -i, --kvm_irq Monitor the interrupt information in KVM VM. -m, --mmio Monitoring the data of mmio page fault..(The -f option must be specified.) -n, --halt_poll_ns Monitoring the variation in vCPU halt-polling time. -p, --vm_pid=PID Specify the virtual machine pid to monitor. @@ -69,6 +74,8 @@ BPF program used for monitoring KVM event `-f`:记录kvmmmu缺页信息 +`-i`:记录kvm中断设置相关信息 + `-m`:记录mmio缺页信息(需要和`-f`一同使用) `-d`:记录kvm脏页信息 @@ -77,7 +84,7 @@ BPF program used for monitoring KVM event `-w`:记录vcpu唤醒时的相关信息 -`-p`:指定kvm虚拟机进程pid(必须为虚拟机进程,否则会报错) +`-p`:指定kvm虚拟机进程pid `-t`:监控时间 @@ -87,6 +94,7 @@ BPF program used for monitoring KVM event ├── include │ ├── kvm_exits.h //vm exit事件相关的内核bpf程序 │ ├── kvm_mmu.h //kvmmmu相关的内核bpf程序 +│ ├── kvm_irq.h //中断注入相关内核bpf程序 │ ├── kvm_vcpu.h //vcpu相关内核bpf程序 │ └── kvm_watcher.h //项目公用头文件 ├── Makefile //编译脚本 @@ -131,10 +139,7 @@ BPF program used for monitoring KVM event ``` make - sudo ./kvm_watcher -w -t 10 - sudo ./kvm_watcher -e -t 10 -s - sudo ./kvm_watcher -n -t 10 - sudo ./kvm_watcher -d -t 10 + sudo ./kvm_watcher [options] make clean ``` diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h index 8b98a4e0c..21221c2ab 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h @@ -80,7 +80,7 @@ static int trace_kvm_exit(struct exit *ctx, pid_t vm_pid) { return 0; } -static int trace_kvm_entry(void *rb) { +static int trace_kvm_entry(void *rb, struct common_event *e) { struct reason_info *reas; pid_t pid, tid; u64 id, ts, *start_ts, duration_ns = 0; @@ -90,25 +90,22 @@ static int trace_kvm_entry(void *rb) { reas = bpf_map_lookup_elem(×, &tid); if (reas) { u32 reason; - struct exit_event *e; int count = 0; duration_ns = bpf_ktime_get_ns() - reas->time; bpf_map_delete_elem(×, &tid); reason = reas->reason; count = reas->count; RESERVE_RINGBUF_ENTRY(rb, e); - e->reason_number = reason; + e->exit_data.reason_number = reason; e->process.pid = pid; - e->duration_ns = duration_ns; - bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); e->process.tid = tid; - e->total = ++total; - e->count = count; + e->exit_data.duration_ns = duration_ns; + bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); + e->exit_data.total = ++total; + e->exit_data.count = count; e->time = reas->time; bpf_ringbuf_submit(e, 0); - return 0; - } else { - return 0; } + return 0; } #endif /* __KVM_EXITS_H */ diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_mmu.h b/eBPF_Supermarket/kvm_watcher/include/kvm_mmu.h index 5a7edc5d2..3a55ea1db 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_mmu.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_mmu.h @@ -50,7 +50,8 @@ static int trace_page_fault(struct trace_event_raw_kvm_page_fault *ctx, } static int trace_direct_page_fault(struct kvm_vcpu *vcpu, - struct kvm_page_fault *fault, void *rb) { + struct kvm_page_fault *fault, void *rb, + struct common_event *e) { u64 addr; bpf_probe_read_kernel(&addr, sizeof(u64), &fault->addr); u64 *ts; @@ -66,23 +67,22 @@ static int trace_direct_page_fault(struct kvm_vcpu *vcpu, short memslot_id = BPF_CORE_READ(fault, slot, id); u64 delay = bpf_ktime_get_ns() - *ts; bpf_map_delete_elem(&pf_delay, &addr); - struct page_fault_event *e; RESERVE_RINGBUF_ENTRY(rb, e); count = bpf_map_lookup_elem(&pf_count, &addr); if (count) { (*count)++; - e->count = *count; + e->page_fault_data.count = *count; bpf_map_update_elem(&pf_count, &addr, count, BPF_ANY); } else { - e->count = 1; + e->page_fault_data.count = 1; bpf_map_update_elem(&pf_count, &addr, &new_count, BPF_ANY); } - e->delay = delay; - e->addr = addr; - e->error_code = error_code; - e->hva = hva; - e->pfn = pfn; - e->memslot_id = memslot_id; + e->page_fault_data.delay = delay; + e->page_fault_data.addr = addr; + e->page_fault_data.error_code = error_code; + e->page_fault_data.hva = hva; + e->page_fault_data.pfn = pfn; + e->page_fault_data.memslot_id = memslot_id; e->process.pid = bpf_get_current_pid_tgid() >> 32; bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); e->time = *ts; @@ -104,7 +104,8 @@ static int trace_kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, } static int trace_handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, - bool direct, void *rb) { + bool direct, void *rb, + struct common_event *e) { u64 *ts; ts = bpf_map_lookup_elem(&pf_delay, &addr); if (ts) { @@ -112,24 +113,24 @@ static int trace_handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, u32 new_count = 1; u64 delay = bpf_ktime_get_ns() - *ts; bpf_map_delete_elem(&pf_delay, &addr); - struct page_fault_event *e; RESERVE_RINGBUF_ENTRY(rb, e); count = bpf_map_lookup_elem(&pf_count, &addr); if (count) { (*count)++; - e->count = *count; + e->page_fault_data.count = *count; bpf_map_update_elem(&pf_count, &addr, count, BPF_ANY); } else { - e->count = 1; + e->page_fault_data.count = 1; bpf_map_update_elem(&pf_count, &addr, &new_count, BPF_ANY); } - e->delay = delay; - e->addr = addr; - e->error_code = PFERR_RSVD_MASK; + e->page_fault_data.delay = delay; + e->page_fault_data.addr = addr; + e->page_fault_data.error_code = PFERR_RSVD_MASK; e->process.pid = bpf_get_current_pid_tgid() >> 32; bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); e->time = *ts; bpf_ringbuf_submit(e, 0); + return 0; } return 0; } diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_pic.h b/eBPF_Supermarket/kvm_watcher/include/kvm_pic.h new file mode 100644 index 000000000..b2671263f --- /dev/null +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_pic.h @@ -0,0 +1,77 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: nanshuaibo811@163.com +// +// Kernel space BPF program used for monitoring data for KVM PIC. +#ifndef __KVM_PIC_H +#define __KVM_PIC_H + +#include "kvm_watcher.h" +#include "vmlinux.h" +#include +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, u32); + __type(value, u64); +} irq_delay SEC(".maps"); + +static int trace_in_kvm_pic_set_irq(struct kvm_pic *s, int irq, + int irq_source_id, int level, + pid_t vm_pid) { + CHECK_PID(vm_pid) { + if (irq < 0 || irq >= PIC_NUM_PINS) { + return 0; + } + u64 ts = bpf_ktime_get_ns(); + u32 irq_type = irq >> 3; + bpf_map_update_elem(&irq_delay, &irq_type, &ts, BPF_ANY); + } + return 0; +} + +static int trace_out_kvm_pic_set_irq(struct kvm_pic *s, int irq, + int irq_source_id, int level, int retval, + void *rb, struct common_event *e) { + u64 *ts; + u32 irq_type = irq >> 3; + ts = bpf_map_lookup_elem(&irq_delay, &irq_type); + if (!ts) { + return 0; + } + u64 time = bpf_ktime_get_ns(); + u64 delay = time - *ts; + bpf_map_delete_elem(&irq_delay, &irq_type); + RESERVE_RINGBUF_ENTRY(rb, e); + e->pic_data.ret = retval; + e->time = *ts; + e->pic_data.delay = delay; + e->process.pid = bpf_get_current_pid_tgid() >> 32; + e->pic_data.chip = irq_type; + e->pic_data.pin = irq & 7; + bpf_probe_read_kernel(&e->pic_data.elcr, sizeof(u8), + &s->pics[irq_type].elcr); + bpf_probe_read_kernel(&e->pic_data.imr, sizeof(u8), &s->pics[irq_type].imr); + bpf_probe_read_kernel(&e->pic_data.irq_source_id, sizeof(int), + &irq_source_id); + bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); + bpf_ringbuf_submit(e, 0); + return 0; +} + +#endif /* __KVM_PIC_H */ \ No newline at end of file diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h index 6216e8481..3b0587694 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h @@ -29,7 +29,7 @@ struct vcpu_wakeup { u64 pad; __u64 ns; bool waited; - bool vaild; + bool valid; }; struct halt_poll_ns { @@ -47,18 +47,40 @@ struct { __type(value, u32); } count_dirty_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, u32); + __type(value, u32); +} vcpu_tid SEC(".maps"); + +static int trace_kvm_vcpu_halt(struct kvm_vcpu *vcpu, pid_t vm_pid) { + CHECK_PID(vm_pid) { + u32 tid = bpf_get_current_pid_tgid(); + u32 vcpu_id; + bpf_probe_read_kernel(&vcpu_id, sizeof(vcpu->vcpu_id), &vcpu->vcpu_id); + bpf_map_update_elem(&vcpu_tid, &tid, &vcpu_id, BPF_ANY); + } + return 0; +} + static int trace_kvm_vcpu_wakeup(struct vcpu_wakeup *ctx, void *rb, - pid_t vm_pid) { + struct common_event *e, pid_t vm_pid) { CHECK_PID(vm_pid) { u32 tid = bpf_get_current_pid_tgid(); - struct vcpu_wakeup_event *e; + u32 *vcpu_id = bpf_map_lookup_elem(&vcpu_tid, &tid); + if (!vcpu_id) { + return 0; + } RESERVE_RINGBUF_ENTRY(rb, e); - u64 hlt_time = bpf_ktime_get_ns(); - e->waited = ctx->waited; + u64 time = bpf_ktime_get_ns(); + e->vcpu_wakeup_data.waited = ctx->waited; e->process.pid = pid; e->process.tid = tid; - e->dur_hlt_ns = ctx->ns; - e->hlt_time = hlt_time; + e->vcpu_wakeup_data.dur_hlt_ns = ctx->ns; + e->vcpu_wakeup_data.vcpu_id = *vcpu_id; + e->time = time; + e->vcpu_wakeup_data.valid = ctx->valid; bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); bpf_ringbuf_submit(e, 0); } @@ -66,19 +88,18 @@ static int trace_kvm_vcpu_wakeup(struct vcpu_wakeup *ctx, void *rb, } static int trace_kvm_halt_poll_ns(struct halt_poll_ns *ctx, void *rb, - pid_t vm_pid) { + struct common_event *e, pid_t vm_pid) { CHECK_PID(vm_pid) { u32 tid = bpf_get_current_pid_tgid(); - struct halt_poll_ns_event *e; RESERVE_RINGBUF_ENTRY(rb, e); u64 time = bpf_ktime_get_ns(); e->process.pid = pid; e->process.tid = tid; e->time = time; - e->grow = ctx->grow; - e->old = ctx->old; - e->new = ctx->new; - e->vcpu_id = ctx->vcpu_id; + e->halt_poll_data.grow = ctx->grow; + e->halt_poll_data.old = ctx->old; + e->halt_poll_data.new = ctx->new; + e->halt_poll_data.vcpu_id = ctx->vcpu_id; bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); bpf_ringbuf_submit(e, 0); } @@ -87,7 +108,8 @@ static int trace_kvm_halt_poll_ns(struct halt_poll_ns *ctx, void *rb, static int trace_mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, - gfn_t gfn, void *rb, pid_t vm_pid) { + gfn_t gfn, void *rb, + struct common_event *e, pid_t vm_pid) { CHECK_PID(vm_pid) { u32 flags; struct kvm_memory_slot *slot; @@ -107,23 +129,22 @@ static int trace_mark_page_dirty_in_slot(struct kvm *kvm, } u32 tid = bpf_get_current_pid_tgid(); unsigned long base_gfn; - struct mark_page_dirty_in_slot_event *e; RESERVE_RINGBUF_ENTRY(rb, e); u64 time = bpf_ktime_get_ns(); e->process.pid = pid; e->process.tid = tid; e->time = time; - e->gfn = gfn; + e->mark_page_dirty_data.gfn = gfn; bpf_probe_read_kernel(&base_gfn, sizeof(memslot->base_gfn), &memslot->base_gfn); - e->rel_gfn = gfn - base_gfn; - bpf_probe_read_kernel(&e->npages, sizeof(memslot->npages), - &memslot->npages); - bpf_probe_read_kernel(&e->userspace_addr, + e->mark_page_dirty_data.rel_gfn = gfn - base_gfn; + bpf_probe_read_kernel(&e->mark_page_dirty_data.npages, + sizeof(memslot->npages), &memslot->npages); + bpf_probe_read_kernel(&e->mark_page_dirty_data.userspace_addr, sizeof(memslot->userspace_addr), &memslot->userspace_addr); - bpf_probe_read_kernel(&e->slot_id, sizeof(memslot->id), - &memslot->id); + bpf_probe_read_kernel(&e->mark_page_dirty_data.slot_id, + sizeof(memslot->id), &memslot->id); bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); bpf_ringbuf_submit(e, 0); } diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h index 4c7c32694..69cf2e2b8 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h @@ -21,6 +21,10 @@ #define TASK_COMM_LEN 16 #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) +#define NS_TO_US(ns) ((ns) / 1000ULL) +#define OUTPUT_INTERVAL(seconds) sleep(seconds) + +#define OPTIONS_LIST "-w, -p, -d, -f, -i, or -e" #define PFERR_PRESENT_BIT 0 #define PFERR_WRITE_BIT 1 @@ -30,12 +34,20 @@ #define PFERR_PK_BIT 5 #define PFERR_SGX_BIT 15 +#define KVM_IRQCHIP_PIC_MASTER 0 +#define KVM_IRQCHIP_PIC_SLAVE 1 +#define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 + +#define PIC_NUM_PINS 16 + #define PFERR_RSVD_MASK (1UL << 3) // mmio -#define PRINT_USAGE_ERR() \ - do { \ - fprintf(stderr, "Use either the -w, -p, -d,-f or -e option.\n"); \ - argp_usage(state); \ +#define PRINT_USAGE_ERR() \ + do { \ + fprintf(stderr, "Please specify exactly one option from %s.\n", \ + OPTIONS_LIST); \ + argp_usage(state); \ } while (0) #define SET_OPTION_AND_CHECK_USAGE(option, value) \ @@ -48,6 +60,11 @@ } \ } while (0) +// 定义清屏宏 +#define CLEAR_SCREEN() printf("\033[2J\033[H") + +#define RING_BUFFER_TIMEOUT_MS 100 + #define RESERVE_RINGBUF_ENTRY(rb, e) \ do { \ typeof(e) _tmp = bpf_ringbuf_reserve(rb, sizeof(*e), 0); \ @@ -60,27 +77,6 @@ unsigned pid = bpf_get_current_pid_tgid() >> 32; \ if ((vm_pid) < 0 || pid == (vm_pid)) -struct process { - unsigned pid; - unsigned tid; - char comm[TASK_COMM_LEN]; -}; -struct vcpu_wakeup_event { - struct process process; - unsigned long long dur_hlt_ns; - bool waited; - unsigned long long hlt_time; -}; - -struct exit_event { - struct process process; - unsigned reason_number; - unsigned long long duration_ns; - int count; - int total; - unsigned long long time; -}; - struct ExitReason { int number; const char *name; @@ -92,35 +88,83 @@ struct reason_info { int count; }; -struct halt_poll_ns_event { - struct process process; - bool grow; - unsigned int new; - unsigned int old; - unsigned long long time; - unsigned vcpu_id; +struct process { + unsigned pid; + unsigned tid; + char comm[TASK_COMM_LEN]; }; -struct mark_page_dirty_in_slot_event { +enum EventType { + NONE_TYPE, + VCPU_WAKEUP, + EXIT, + HALT_POLL, + MARK_PAGE_DIRTY, + PAGE_FAULT, + PIC +} event_type; + +struct common_event { struct process process; unsigned long long time; - unsigned long npages; - unsigned long userspace_addr; - unsigned long long rel_gfn; - unsigned long long gfn; - short slot_id; -}; -struct page_fault_event { - struct process process; - unsigned long long time; - unsigned long long delay; - unsigned long long error_code; - unsigned long long addr; - unsigned long long pfn; - unsigned long long hva; - unsigned count; - short memslot_id; + // 成员特定于每个事件类型的数据 + union { + struct { + unsigned long long dur_hlt_ns; + bool waited; + unsigned vcpu_id; + bool valid; + // VCPU_WAKEUP 特有成员 + } vcpu_wakeup_data; + + struct { + unsigned reason_number; + unsigned long long duration_ns; + int count; + int total; + // EXIT 特有成员 + } exit_data; + + struct { + bool grow; + unsigned int new; + unsigned int old; + unsigned vcpu_id; + // HALT_POLL 特有成员 + } halt_poll_data; + + struct { + unsigned long npages; + unsigned long userspace_addr; + unsigned long long rel_gfn; + unsigned long long gfn; + short slot_id; + // MARK_PAGE_DIRTY 特有成员 + } mark_page_dirty_data; + + struct { + unsigned long long delay; + unsigned long long error_code; + unsigned long long addr; + unsigned long long pfn; + unsigned long long hva; + unsigned count; + short memslot_id; + // PAGE_FAULT 特有成员 + } page_fault_data; + + struct { + unsigned long long delay; + int ret; + unsigned char chip; + unsigned char pin; + unsigned char elcr; + unsigned char imr; + int irq_source_id; + // PIC 特有成员 + } pic_data; + }; }; #endif /* __KVM_WATCHER_H */ \ No newline at end of file diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c index be5294823..c78da17c7 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c @@ -23,25 +23,32 @@ #include "../include/kvm_exits.h" #include "../include/kvm_vcpu.h" #include "../include/kvm_mmu.h" +#include "../include/kvm_pic.h" #include "../include/kvm_watcher.h" char LICENSE[] SEC("license") = "Dual BSD/GPL"; const volatile pid_t vm_pid = -1; +static struct common_event *e; struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 256 * 1024); } rb SEC(".maps"); +SEC("fentry/kvm_vcpu_halt") +int BPF_PROG(fentry_kvm_vcpu_halt, struct kvm_vcpu *vcpu) { + return trace_kvm_vcpu_halt(vcpu, vm_pid); +} + SEC("tp/kvm/kvm_vcpu_wakeup") int tp_vcpu_wakeup(struct vcpu_wakeup *ctx) { - return trace_kvm_vcpu_wakeup(ctx, &rb, vm_pid); + return trace_kvm_vcpu_wakeup(ctx, &rb, e, vm_pid); } SEC("tp/kvm/kvm_halt_poll_ns") int tp_kvm_halt_poll_ns(struct halt_poll_ns *ctx) { - return trace_kvm_halt_poll_ns(ctx, &rb, vm_pid); + return trace_kvm_halt_poll_ns(ctx, &rb, e, vm_pid); } SEC("tp/kvm/kvm_exit") @@ -51,13 +58,13 @@ int tp_exit(struct exit *ctx) { SEC("tp/kvm/kvm_entry") int tp_entry(struct exit *ctx) { - return trace_kvm_entry(&rb); + return trace_kvm_entry(&rb, e); } SEC("kprobe/mark_page_dirty_in_slot") int BPF_KPROBE(kp_mark_page_dirty_in_slot, struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn) { - return trace_mark_page_dirty_in_slot(kvm, memslot, gfn, &rb, vm_pid); + return trace_mark_page_dirty_in_slot(kvm, memslot, gfn, &rb, e, vm_pid); } SEC("tp/kvm/kvm_page_fault") @@ -68,7 +75,7 @@ int tp_page_fault(struct trace_event_raw_kvm_page_fault *ctx) { SEC("fexit/direct_page_fault") int BPF_PROG(fexit_direct_page_fault, struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - return trace_direct_page_fault(vcpu, fault, &rb); + return trace_direct_page_fault(vcpu, fault, &rb, e); } SEC("fentry/kvm_mmu_page_fault") @@ -80,5 +87,18 @@ int BPF_PROG(fentry_kvm_mmu_page_fault, struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, SEC("fexit/handle_mmio_page_fault") int BPF_PROG(fexit_handle_mmio_page_fault, struct kvm_vcpu *vcpu, u64 addr, bool direct) { - return trace_handle_mmio_page_fault(vcpu, addr, direct, &rb); + return trace_handle_mmio_page_fault(vcpu, addr, direct, &rb, e); +} + +SEC("fentry/kvm_pic_set_irq") +int BPF_PROG(fentry_kvm_pic_set_irq, struct kvm_pic *s, int irq, + int irq_source_id, int level) { + return trace_in_kvm_pic_set_irq(s, irq, irq_source_id, level, vm_pid); +} + +SEC("fexit/kvm_pic_set_irq") +int BPF_PROG(fexit_kvm_pic_set_irq, struct kvm_pic *s, int irq, + int irq_source_id, int level, int retval) { + return trace_out_kvm_pic_set_irq(s, irq, irq_source_id, level, retval, &rb, + e); } \ No newline at end of file diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index af118a208..62dfeaa1c 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -172,8 +172,9 @@ void freeExitInfoList(Node *head) { void printExitInfo(Node *head) { Node *current = head; + CLEAR_SCREEN(); printf( - "\n-----------------------------------------------------------------" + "-----------------------------------------------------------------" "----------\n"); printf("%-21s %-18s %-8s %-8s %-13s \n", "EXIT_REASON", "COMM", "PID", "COUNT", "AVG_DURATION(ns)"); @@ -196,7 +197,7 @@ int doesVmProcessExist(pid_t pid) { if (proc_name[size - 1] == '\n') { proc_name[size - 1] = '\0'; // Remove newline character } - if (strstr(proc_name, "qemu-system-x86_64") != NULL) { + if (strstr(proc_name, "qemu-system") != NULL) { fclose(file); return 1; // VmProcess name contains the target string } else { @@ -297,8 +298,10 @@ static struct env { bool execute_mark_page_dirty; bool execute_page_fault; bool mmio_page_fault; + bool execute_pic; int monitoring_time; pid_t vm_pid; + enum EventType event_type; } env = { .execute_vcpu_wakeup = false, .execute_exit = false, @@ -306,9 +309,11 @@ static struct env { .execute_halt_poll_ns = false, .execute_mark_page_dirty = false, .execute_page_fault = false, + .execute_pic = false, .mmio_page_fault = false, .monitoring_time = 0, .vm_pid = -1, + .event_type = NONE_TYPE, }; const char *argp_program_version = "kvm_watcher 1.0"; @@ -319,19 +324,20 @@ int option_selected = 0; // 功能标志变量,确保激活子功能 static const struct argp_option opts[] = { {"vcpu_wakeup", 'w', NULL, 0, "Monitoring the wakeup of vcpu."}, {"vm_exit", 'e', NULL, 0, "Monitoring the event of vm exit."}, - {"vcpu_halt_poll_ns", 'n', NULL, 0, - "Monitoring the variation in vCPU polling time."}, + {"halt_poll_ns", 'n', NULL, 0, + "Monitoring the variation in vCPU halt-polling time."}, {"mark_page_dirty", 'd', NULL, 0, "Monitor virtual machine dirty page information."}, {"kvmmmu_page_fault", 'f', NULL, 0, "Monitoring the data of kvmmmu page fault."}, + {"kvm_pic", 'i', NULL, 0, "Monitor the interrupt information in KVM VM."}, {"stat", 's', NULL, 0, "Display statistical data.(The -e option must be specified.)"}, {"mmio", 'm', NULL, 0, "Monitoring the data of mmio page fault..(The -f option must be " "specified.)"}, {"vm_pid", 'p', "PID", 0, "Specify the virtual machine pid to monitor."}, - {"monitoring_time", 't', "SEC", 0, "Time for monitoring event."}, + {"monitoring_time", 't', "SEC", 0, "Time for monitoring."}, {}, }; @@ -355,6 +361,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) { case 'f': SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_page_fault); break; + case 'i': + SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_pic); + break; case 's': if (env.execute_exit) { env.ShowStats = true; @@ -416,71 +425,219 @@ static void sig_handler(int sig) { exiting = true; } +// 根据 env 设置 EventType +static int determineEventType(struct env *env) { + if (!env) { + return 1; + } + if (env->execute_vcpu_wakeup) { + env->event_type = VCPU_WAKEUP; + } else if (env->execute_exit) { + env->event_type = EXIT; + } else if (env->execute_halt_poll_ns) { + env->event_type = HALT_POLL; + } else if (env->execute_mark_page_dirty) { + env->event_type = MARK_PAGE_DIRTY; + } else if (env->execute_page_fault) { + env->event_type = PAGE_FAULT; + } else if (env->execute_pic) { + env->event_type = PIC; + } else { + env->event_type = NONE_TYPE; // 或者根据需要设置一个默认的事件类型 + } + return 0; +} + +const char *get_irqchip(unsigned char chip) { + if (chip >= KVM_NR_IRQCHIPS) { + return "Invalid"; + } else if (chip == KVM_IRQCHIP_PIC_MASTER) { + return "master"; + } else if (chip == KVM_IRQCHIP_PIC_SLAVE) { + return "slave"; + } else if (chip == KVM_IRQCHIP_IOAPIC) { + return "ioapic"; + } else { + return "Unknown"; + } +} + static int handle_event(void *ctx, void *data, size_t data_sz) { - if (env.execute_vcpu_wakeup) { - const struct vcpu_wakeup_event *e = data; - printf("%-18llu %-20llu %-15s %-6d/%-8d %-10s\n", e->hlt_time, - e->dur_hlt_ns, e->process.comm, e->process.pid, e->process.tid, - e->waited ? "wait" : "poll"); - } else if (env.execute_exit) { - char info_buffer[256]; - const struct exit_event *e = data; - printf("%-18llu %-2d/%-18s %-18s %-6u/%-8u %-8d %-13llu \n", e->time, - e->reason_number, getExitReasonName(e->reason_number), - e->process.comm, e->process.pid, e->process.tid, e->count, - e->duration_ns); - if (env.ShowStats) { - snprintf(info_buffer, sizeof(info_buffer), "%-18s %-8u %-8d", - e->process.comm, e->process.pid, e->count); - addExitInfo(&exitInfoBuffer, e->reason_number, info_buffer, - e->duration_ns, e->count); - } - } else if (env.execute_halt_poll_ns) { - const struct halt_poll_ns_event *e = data; - printf("%-18llu %-15s %-6d/%-8d %-10s %-7d %-7d --> %d \n", e->time, - e->process.comm, e->process.pid, e->process.tid, - e->grow ? "grow" : "shrink", e->vcpu_id, e->old, e->new); - } else if (env.execute_mark_page_dirty) { - const struct mark_page_dirty_in_slot_event *e = data; - printf("%-18llu %-15s %-6d/%-8d %-10llx %-10llx %-10lu %-15lx %d \n", - e->time, e->process.comm, e->process.pid, e->process.tid, e->gfn, - e->rel_gfn, e->npages, e->userspace_addr, e->slot_id); - } else if (env.execute_page_fault) { - const struct page_fault_event *e = data; - printf("%-18llu %-15s %-10u %-12llx %-6u %-10llu ", e->time, - e->process.comm, e->process.pid, e->addr, e->count, e->delay); - if (e->error_code & (1ULL << PFERR_RSVD_BIT)) { - printf("%-20s %-17s %-10s", "-", "-", "-"); - } else { - printf("%-20llx %-17llx %-10d", e->hva, e->pfn, e->memslot_id); - } - if (e->error_code & (1ULL << PFERR_PRESENT_BIT)) { - printf(" Present"); - } - if (e->error_code & (1ULL << PFERR_WRITE_BIT)) { - printf(" Write"); + struct common_event *e = data; + switch (env.event_type) { + case VCPU_WAKEUP: { + // 使用 e->vcpu_wakeup_data 访问 VCPU_WAKEUP 特有成员 + printf("%-18llu %-20llu %-15s %-6d/%-8d %-10d %-10s %-10s\n", + e->time, NS_TO_US(e->vcpu_wakeup_data.dur_hlt_ns), + e->process.comm, e->process.pid, e->process.tid, + e->vcpu_wakeup_data.vcpu_id, + e->vcpu_wakeup_data.waited ? "wait" : "poll", + e->vcpu_wakeup_data.valid ? "valid" : "invalid"); + break; } - if (e->error_code & (1ULL << PFERR_USER_BIT)) { - printf(" User"); + case EXIT: { + char info_buffer[256]; + // 使用 e->exit_data 访问 EXIT 特有成员 + printf("%-18llu %-2d/%-18s %-18s %-6u/%-8u %-8d %-13llu \n", + e->time, e->exit_data.reason_number, + getExitReasonName(e->exit_data.reason_number), + e->process.comm, e->process.pid, e->process.tid, + e->exit_data.count, e->exit_data.duration_ns); + + if (env.ShowStats) { + snprintf(info_buffer, sizeof(info_buffer), "%-18s %-8u %-8d", + e->process.comm, e->process.pid, e->exit_data.count); + addExitInfo(&exitInfoBuffer, e->exit_data.reason_number, + info_buffer, e->exit_data.duration_ns, + e->exit_data.count); + } + break; } - if (e->error_code & (1ULL << PFERR_RSVD_BIT)) { - printf(" Reserved(MMIO)"); - /*IOAPIC 的mmio基址 #define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000*/ + case HALT_POLL: { + // 使用 e->halt_poll_data 访问 HALT_POLL 特有成员 + printf("%-18llu %-15s %-6d/%-8d %-10s %-7d %-7d --> %d \n", e->time, + e->process.comm, e->process.pid, e->process.tid, + e->halt_poll_data.grow ? "grow" : "shrink", + e->halt_poll_data.vcpu_id, e->halt_poll_data.old, + e->halt_poll_data.new); + break; } - if (e->error_code & (1ULL << PFERR_FETCH_BIT)) { - printf(" Exec"); + case MARK_PAGE_DIRTY: { + // 使用 e->mark_page_dirty_data 访问 MARK_PAGE_DIRTY 特有成员 + printf( + "%-18llu %-15s %-6d/%-8d %-10llx %-10llx %-10lu %-15lx %d \n", + e->time, e->process.comm, e->process.pid, e->process.tid, + e->mark_page_dirty_data.gfn, e->mark_page_dirty_data.rel_gfn, + e->mark_page_dirty_data.npages, + e->mark_page_dirty_data.userspace_addr, + e->mark_page_dirty_data.slot_id); + break; } - if (e->error_code & (1ULL << PFERR_PK_BIT)) { - printf(" Protection-Key"); + case PAGE_FAULT: { + // 使用 e->page_fault_data 访问 PAGE_FAULT 特有成员 + printf("%-18llu %-15s %-10u %-12llx %-6u %-10llu ", e->time, + e->process.comm, e->process.pid, e->page_fault_data.addr, + e->page_fault_data.count, e->page_fault_data.delay); + if (e->page_fault_data.error_code & (1ULL << PFERR_RSVD_BIT)) { + printf("%-20s %-17s %-10s", "-", "-", "-"); + } else { + printf("%-20llx %-17llx %-10d", e->page_fault_data.hva, + e->page_fault_data.pfn, e->page_fault_data.memslot_id); + } + if (e->page_fault_data.error_code & (1ULL << PFERR_PRESENT_BIT)) { + printf(" Present"); + } + if (e->page_fault_data.error_code & (1ULL << PFERR_WRITE_BIT)) { + printf(" Write"); + } + if (e->page_fault_data.error_code & (1ULL << PFERR_USER_BIT)) { + printf(" User"); + } + if (e->page_fault_data.error_code & (1ULL << PFERR_RSVD_BIT)) { + printf(" Reserved(MMIO)"); + /*IOAPIC 的mmio基址 #define IOAPIC_DEFAULT_BASE_ADDRESS + * 0xfec00000*/ + } + if (e->page_fault_data.error_code & (1ULL << PFERR_FETCH_BIT)) { + printf(" Exec"); + } + if (e->page_fault_data.error_code & (1ULL << PFERR_PK_BIT)) { + printf(" Protection-Key"); + } + if (e->page_fault_data.error_code & (1ULL << PFERR_SGX_BIT)) { + printf(" SGX"); + } + printf("\n"); + break; } - if (e->error_code & (1ULL << PFERR_SGX_BIT)) { - printf(" SGX"); + case PIC: { + // 使用 e->pic_data 访问 PAGE_FAULT 特有成员 + printf( + "%-18llu %-15s %-10d %-10llu %-10s %-10u %-10s %-10s %-10d " + "%-10s\n", + e->time, e->process.comm, e->process.pid, e->pic_data.delay, + get_irqchip(e->pic_data.chip), e->pic_data.pin, + (e->pic_data.elcr & (1 << e->pic_data.pin)) ? "level" : "edge", + (e->pic_data.imr & (1 << e->pic_data.pin)) ? "masked" : "-", + e->pic_data.irq_source_id, + e->pic_data.ret == 0 ? "coalesced" : "-"); } - printf("\n"); + default: + // 处理未知事件类型 + break; + } + + return 0; +} + +static int print_event_head(struct env *env) { + if (!env->event_type) { + // 处理无效参数,可以选择抛出错误或返回 + return 1; + } + switch (env->event_type) { + case VCPU_WAKEUP: + printf("%-18s %-20s %-15s %-15s %-10s %-10s %-10s\n", "TIME(ns)", + "DUR_HALT(us)", "COMM", "PID/TID", "VCPU_ID", "WAIT/POLL", + "VAILD?"); + break; + case EXIT: + printf("%-18s %-21s %-18s %-15s %-8s %-13s \n", "TIME(ns)", + "EXIT_REASON", "COMM", "PID/TID", "COUNT", "DURATION(ns)"); + break; + case HALT_POLL: + printf("%-18s %-15s %-15s %-10s %-7s %-11s %-10s\n", "TIME(ns)", + "COMM", "PID/TID", "TYPE", "VCPU_ID", "OLD(ns)", "NEW(ns)"); + break; + case MARK_PAGE_DIRTY: + printf("%-18s %-15s %-15s %-10s %-10s %-10s %-10s %-10s\n", + "TIME(ns)", "COMM", "PID/TID", "GFN", "REL_GFN", "NPAGES", + "USERSPACE_ADDR", "SLOT_ID"); + break; + case PAGE_FAULT: + printf("%-18s %-15s %-10s %-12s %-6s %-10s %-20s %-17s %-10s %s\n", + "TIMESTAMP", "COMM", "PID", "ADDRESS", "COUNT", "DELAY", + "HVA", "PFN", "MEM_SLOTID", "ERROR_TYPE"); + break; + case PIC: + printf( + "%-18s %-15s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", + "TIMESTAMP", "COMM", "PID", "DELAY", "CHIP", "PIN", "TRIG_MODE", + "MASK", "SOURCE_ID", "COALESCE"); + default: + // Handle default case or display an error message + break; } return 0; } +static void set_disable_load(struct kvm_watcher_bpf *skel) { + bpf_program__set_autoload(skel->progs.tp_vcpu_wakeup, + env.execute_vcpu_wakeup ? true : false); + bpf_program__set_autoload(skel->progs.fentry_kvm_vcpu_halt, + env.execute_vcpu_wakeup ? true : false); + bpf_program__set_autoload(skel->progs.tp_exit, + env.execute_exit ? true : false); + bpf_program__set_autoload(skel->progs.tp_entry, + env.execute_exit ? true : false); + bpf_program__set_autoload(skel->progs.tp_kvm_halt_poll_ns, + env.execute_halt_poll_ns ? true : false); + bpf_program__set_autoload(skel->progs.kp_mark_page_dirty_in_slot, + env.execute_mark_page_dirty ? true : false); + bpf_program__set_autoload(skel->progs.tp_page_fault, + env.execute_page_fault ? true : false); + bpf_program__set_autoload(skel->progs.fexit_direct_page_fault, + env.execute_page_fault ? true : false); + bpf_program__set_autoload(skel->progs.fentry_kvm_mmu_page_fault, + env.mmio_page_fault ? true : false); + bpf_program__set_autoload(skel->progs.fexit_handle_mmio_page_fault, + env.mmio_page_fault ? true : false); + bpf_program__set_autoload(skel->progs.fentry_kvm_pic_set_irq, + env.execute_pic ? true : false); + bpf_program__set_autoload(skel->progs.fexit_kvm_pic_set_irq, + env.execute_pic ? true : false); +} + int main(int argc, char **argv) { struct ring_buffer *rb = NULL; struct kvm_watcher_bpf *skel; @@ -509,24 +666,8 @@ int main(int argc, char **argv) { skel->rodata->vm_pid = env.vm_pid; /* Disable or load kernel hook functions */ - bpf_program__set_autoload(skel->progs.tp_vcpu_wakeup, - env.execute_vcpu_wakeup ? true : false); - bpf_program__set_autoload(skel->progs.tp_exit, - env.execute_exit ? true : false); - bpf_program__set_autoload(skel->progs.tp_entry, - env.execute_exit ? true : false); - bpf_program__set_autoload(skel->progs.tp_kvm_halt_poll_ns, - env.execute_halt_poll_ns ? true : false); - bpf_program__set_autoload(skel->progs.kp_mark_page_dirty_in_slot, - env.execute_mark_page_dirty ? true : false); - bpf_program__set_autoload(skel->progs.tp_page_fault, - env.execute_page_fault ? true : false); - bpf_program__set_autoload(skel->progs.fexit_direct_page_fault, - env.execute_page_fault ? true : false); - bpf_program__set_autoload(skel->progs.fentry_kvm_mmu_page_fault, - env.mmio_page_fault ? true : false); - bpf_program__set_autoload(skel->progs.fexit_handle_mmio_page_fault, - env.mmio_page_fault ? true : false); + set_disable_load(skel); + /* Load & verify BPF programs */ err = kvm_watcher_bpf__load(skel); if (err) { @@ -548,27 +689,27 @@ int main(int argc, char **argv) { fprintf(stderr, "Failed to create ring buffer\n"); goto cleanup; } - /* Process events */ - if (env.execute_vcpu_wakeup) { - printf("%-18s %-20s %-15s %-15s %-10s\n", "HLT_TIME(ns)", - "DURATIONS_TIME(ns)", "COMM", "PID/TID", "WAIT/POLL"); - } else if (env.execute_exit) { - printf("%-18s %-21s %-18s %-15s %-8s %-13s \n", "TIME", "EXIT_REASON", - "COMM", "PID/TID", "COUNT", "DURATION(ns)"); - } else if (env.execute_halt_poll_ns) { - printf("%-18s %-15s %-15s %-10s %-7s %-11s %-10s\n", "TIME(ns)", "COMM", - "PID/TID", "TYPE", "VCPU_ID", "OLD(ns)", "NEW(ns)"); - } else if (env.execute_mark_page_dirty) { - printf("%-18s %-15s %-15s %-10s %-11s %-10s %-10s %-10s\n", "TIME(ns)", - "COMM", "PID/TID", "GFN", "REL_GFN", "NPAGES", "USERSPACE_ADDR", - "SLOT_ID"); - } else if (env.execute_page_fault) { - printf("%-18s %-15s %-10s %-12s %-6s %-10s %-20s %-17s %-10s %s\n", - "TIMESTAMP", "COMM", "PID", "ADDRESS", "COUNT", "DELAY", "HVA", - "PFN", "MEM_SLOTID", "ERROR_TYPE"); + + // 根据 env 设置 EventType + err = determineEventType(&env); + if (err) { + fprintf(stderr, "Invalid env parm\n"); + goto cleanup; + } + + // 清屏 + if (option_selected) { + CLEAR_SCREEN(); + } + + /*打印信息头*/ + err = print_event_head(&env); + if (err) { + fprintf(stderr, "Please specify an option using %s.\n", OPTIONS_LIST); + goto cleanup; } while (!exiting) { - err = ring_buffer__poll(rb, 10 /* timeout, ms */); + err = ring_buffer__poll(rb, RING_BUFFER_TIMEOUT_MS /* timeout, ms */); /* Ctrl-C will cause -EINTR */ if (err == -EINTR) { err = 0;