From 07b24882c2e45f199166640de505af36645d900c Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 2 Feb 2024 13:10:20 +0800 Subject: [PATCH 01/15] add kvmexit watcher --- .../kvm_watcher/kvm_exit_bcc/kvmexit.py | 378 ++++++++++++++++++ .../kvm_exit_bcc/kvmexit_example.txt | 250 ++++++++++++ 2 files changed, 628 insertions(+) create mode 100644 eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py create mode 100644 eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit_example.txt diff --git a/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py new file mode 100644 index 000000000..2f82530f0 --- /dev/null +++ b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python +# +# kvmexit.py +# +# Display the exit_reason and its statistics of each vm exit +# for all vcpus of all virtual machines. For example: +# $./kvmexit.py +# PID TID KVM_EXIT_REASON COUNT +# 1273551 1273568 EXIT_REASON_MSR_WRITE 6 +# 1274253 1274261 EXIT_REASON_EXTERNAL_INTERRUPT 1 +# 1274253 1274261 EXIT_REASON_HLT 12 +# ... +# +# Besides, we also allow users to specify one pid, tid(s), or one +# pid and its vcpu. See kvmexit_example.txt for more examples. +# +# @PID: each vitual machine's pid in the user space. +# @TID: the user space's thread of each vcpu of that virtual machine. +# @KVM_EXIT_REASON: the reason why the vm exits. +# @COUNT: the counts of the @KVM_EXIT_REASONS. +# +# REQUIRES: Linux 6.2 (BPF_PROG_TYPE_TRACEPOINT support) +# +# Copyright (c) 2024 YYS. All rights reserved. +# Original code © 2024 ByteDance Inc. All rights reserved. +# Author(s): +# YYS +# 以下代码段是根据Fei Li的实现进行的修改 +# 原始代码链接:https://github.com/iovisor/bcc/blob/master/tools/kvmexit.py + + +from __future__ import print_function +from time import sleep +from bcc import BPF +import argparse +import multiprocessing +import os +import subprocess + +# +# Process Arguments +# +def valid_args_list(args): + args_list = args.split(",") + for arg in args_list: + try: + int(arg) + except: + raise argparse.ArgumentTypeError("must be valid integer") + return args_list + +# arguments +examples = """examples: + ./kvmexit # Display kvm_exit_reason and its statistics in real-time until Ctrl-C + ./kvmexit 5 # Display in real-time after sleeping 5s + ./kvmexit -p 3195281 # Collpase all tids for pid 3195281 with exit reasons sorted in descending order + ./kvmexit -p 3195281 20 # Collpase all tids for pid 3195281 with exit reasons sorted in descending order, and display after sleeping 20s + ./kvmexit -p 3195281 -v 0 # Display only vcpu0 for pid 3195281, descending sort by default + ./kvmexit -p 3195281 -a # Display all tids for pid 3195281 + ./kvmexit -t 395490 # Display only for tid 395490 with exit reasons sorted in descending order + ./kvmexit -t 395490 20 # Display only for tid 395490 with exit reasons sorted in descending order after sleeping 20s + ./kvmexit -T '395490,395491' # Display for a union like {395490, 395491} +""" +parser = argparse.ArgumentParser( + description="Display kvm_exit_reason and its statistics at a timed interval", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("duration", nargs="?", default=99999999, type=int, help="show delta for next several seconds") +parser.add_argument("-p", "--pid", type=int, help="trace this PID only") +exgroup = parser.add_mutually_exclusive_group() +exgroup.add_argument("-t", "--tid", type=int, help="trace this TID only") +exgroup.add_argument("-T", "--tids", type=valid_args_list, help="trace a comma separated series of tids with no space in between") +exgroup.add_argument("-v", "--vcpu", type=int, help="trace this vcpu only") +exgroup.add_argument("-a", "--alltids", action="store_true", help="trace all tids for this pid") +args = parser.parse_args() +duration = int(args.duration) + +# +# Setup BPF +# + +# load BPF program +bpf_text = """ +#include + +#define REASON_NUM 76 +#define TGID_NUM 1024 + +struct exit_count { + u64 exit_ct[REASON_NUM]; +}; +BPF_PERCPU_ARRAY(init_value, struct exit_count, 1); +BPF_TABLE("percpu_hash", u64, struct exit_count, pcpu_kvm_stat, TGID_NUM); + +struct cache_info { + u64 cache_pid_tgid; + struct exit_count cache_exit_ct; +}; +BPF_PERCPU_ARRAY(pcpu_cache, struct cache_info, 1); + +TRACEPOINT_PROBE(kvm, kvm_exit) { + int cache_miss = 0; + int zero = 0; + u32 er = args->exit_reason; + if (er >= REASON_NUM) { + return 0; + } + + u64 cur_pid_tgid = bpf_get_current_pid_tgid(); + u32 tgid = cur_pid_tgid >> 32; + u32 pid = cur_pid_tgid; + + if (THREAD_FILTER) + return 0; + + struct exit_count *tmp_info = NULL, *initial = NULL; + struct cache_info *cache_p; + cache_p = pcpu_cache.lookup(&zero); + if (cache_p == NULL) { + return 0; + } + + if (cache_p->cache_pid_tgid == cur_pid_tgid) { + //a. If the cur_pid_tgid hit this physical cpu consecutively, save it to pcpu_cache + tmp_info = &cache_p->cache_exit_ct; + } else { + //b. If another pid_tgid matches this pcpu for the last hit, OR it is the first time to hit this physical cpu. + cache_miss = 1; + + // b.a Try to load the last cache struct if exists. + tmp_info = pcpu_kvm_stat.lookup(&cur_pid_tgid); + + // b.b If it is the first time for the cur_pid_tgid to hit this pcpu, employ a + // per_cpu array to initialize pcpu_kvm_stat's exit_count with each exit reason's count is zero + if (tmp_info == NULL) { + initial = init_value.lookup(&zero); + if (initial == NULL) { + return 0; + } + + pcpu_kvm_stat.update(&cur_pid_tgid, initial); + tmp_info = pcpu_kvm_stat.lookup(&cur_pid_tgid); + // To pass the verifier + if (tmp_info == NULL) { + return 0; + } + } + } + + if (er < REASON_NUM) { + tmp_info->exit_ct[er]++; + if (cache_miss == 1) { + if (cache_p->cache_pid_tgid != 0) { + // b.*.a Let's save the last hit cache_info into kvm_stat. + pcpu_kvm_stat.update(&cache_p->cache_pid_tgid, &cache_p->cache_exit_ct); + } + // b.* As the cur_pid_tgid meets current pcpu_cache_array for the first time, save it. + cache_p->cache_pid_tgid = cur_pid_tgid; + bpf_probe_read(&cache_p->cache_exit_ct, sizeof(*tmp_info), tmp_info); + } + return 0; + } + + return 0; +} +""" + +# format output +exit_reasons = ( + "EXCEPTION_NMI", + "EXTERNAL_INTERRUPT", + "TRIPLE_FAULT", + "INIT_SIGNAL", + "SIPI_SIGNAL ", + "N/A", + "N/A", + "INTERRUPT_WINDOW", + "NMI_WINDOW", + "TASK_SWITCH", + "CPUID", + "N/A", + "HLT", + "INVD", + "INVLPG", + "RDPMC", + "RDTSC", + "N/A", + "VMCALL", + "VMCLEAR", + "VMLAUNCH", + "VMPTRLD", + "VMPTRST", + "VMREAD", + "VMRESUME", + "VMWRITE", + "VMOFF", + "VMON", + "CR_ACCESS", + "DR_ACCESS", + "IO_INSTRUCTION", + "MSR_READ", + "MSR_WRITE", + "INVALID_STATE", + "MSR_LOAD_FAIL", + "N/A", + "MWAIT_INSTRUCTION", + "MONITOR_TRAP_FLAG", + "N/A", + "MONITOR_INSTRUCTION", + "PAUSE_INSTRUCTION", + "MCE_DURING_VMENTRY", + "N/A", + "TPR_BELOW_THRESHOLD", + "APIC_ACCESS", + "EOI_INDUCED", + "GDTR_IDTR", + "LDTR_TR", + "EPT_VIOLATION", + "EPT_MISCONFIG", + "INVEPT", + "RDTSCP", + "PREEMPTION_TIMER", + "INVVPID", + "WBINVD", + "XSETBV", + "APIC_WRITE", + "RDRAND", + "INVPCID", + "VMFUNC", + "ENCLS", + "RDSEED", + "PML_FULL", + "XSAVES", + "XRSTORS", + "N/A", + "N/A", + "UMWAIT", + "TPAUSE", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "BUS_LOCK", + "NOTIFY " +) + +# +# Do some checks +# +try: + # Currently, only adapte on intel architecture + cmd = "cat /proc/cpuinfo | grep vendor_id | head -n 1" + arch_info = subprocess.check_output(cmd, shell=True).strip() + if b"Intel" in arch_info: + pass + else: + raise Exception("Currently we only support Intel architecture, please do expansion if needs more.") + + # Check if kvm module is loaded + if os.access("/dev/kvm", os.R_OK | os.W_OK): + pass + else: + raise Exception("Please insmod kvm module to use kvmexit tool.") +except Exception as e: + raise Exception("Failed to do precondition check, due to: %s." % e) + +def find_tid(tgt_dir, tgt_vcpu): + for tid in os.listdir(tgt_dir): + path = tgt_dir + "/" + tid + "/comm" + fp = open(path, "r") + comm = fp.read() + if (comm.find(tgt_vcpu) != -1): + return tid + return -1 + +# set process/thread filter +thread_context = "" +header_format = "" +need_collapse = not args.alltids +if args.tid is not None: + thread_context = "TID %s" % args.tid + thread_filter = 'pid != %s' % args.tid +elif args.tids is not None: + thread_context = "TIDS %s" % args.tids + thread_filter = "pid != " + " && pid != ".join(args.tids) + header_format = "TIDS " +elif args.pid is not None: + thread_context = "PID %s" % args.pid + thread_filter = 'tgid != %s' % args.pid + if args.vcpu is not None: + thread_context = "PID %s VCPU %s" % (args.pid, args.vcpu) + # transfer vcpu to tid + tgt_dir = '/proc/' + str(args.pid) + '/task' + tgt_vcpu = "CPU " + str(args.vcpu) + args.tid = find_tid(tgt_dir, tgt_vcpu) + if args.tid == -1: + raise Exception("There's no v%s for PID %d." % (tgt_vcpu, args.pid)) + thread_filter = 'pid != %s' % args.tid + elif args.alltids: + thread_context = "PID %s and its all threads" % args.pid + header_format = "TID " +else: + thread_context = "all threads" + thread_filter = '0' + header_format = "PID TID " +bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter) +b = BPF(text=bpf_text) + + +# header +print("Display kvm exit reasons and statistics for %s" % thread_context, end="") +if duration < 99999999: + print(" after sleeping %d secs." % duration) +else: + print("... Hit Ctrl-C to end.") + +try: + sleep(duration) +except KeyboardInterrupt: + print() + + +# Currently, sort multiple tids in descending order is not supported. +if (args.pid or args.tid): + ct_reason = [] + if args.pid: + tgid_exit = [0 for i in range(len(exit_reasons))] + +# output +print("%s%-35s %s" % (header_format, "KVM_EXIT_REASON", "COUNT")) + +pcpu_kvm_stat = b["pcpu_kvm_stat"] +pcpu_cache = b["pcpu_cache"] +for k, v in pcpu_kvm_stat.items(): + tgid = k.value >> 32 + pid = k.value & 0xffffffff + for i in range(0, len(exit_reasons)): + sum1 = 0 + for inner_cpu in range(0, multiprocessing.cpu_count()): + cachePIDTGID = pcpu_cache[0][inner_cpu].cache_pid_tgid + # Take priority to check if it is in cache + if cachePIDTGID == k.value: + sum1 += pcpu_cache[0][inner_cpu].cache_exit_ct.exit_ct[i] + # If not in cache, find from kvm_stat + else: + sum1 += v[inner_cpu].exit_ct[i] + if sum1 == 0: + continue + + if (args.pid and args.pid == tgid and need_collapse): + tgid_exit[i] += sum1 + elif (args.tid and args.tid == pid): + ct_reason.append((sum1, i)) + elif not need_collapse or args.tids: + print("%-8u %-35s %-8u" % (pid, exit_reasons[i], sum1)) + else: + print("%-8u %-8u %-35s %-8u" % (tgid, pid, exit_reasons[i], sum1)) + + # Display only for the target tid in descending sort + if (args.tid and args.tid == pid): + ct_reason.sort(reverse=True) + for i in range(0, len(ct_reason)): + if ct_reason[i][0] == 0: + continue + print("%-35s %-8u" % (exit_reasons[ct_reason[i][1]], ct_reason[i][0])) + break + + +# Aggregate all tids' counts for this args.pid in descending sort +if args.pid and need_collapse: + for i in range(0, len(exit_reasons)): + ct_reason.append((tgid_exit[i], i)) + ct_reason.sort(reverse=True) + for i in range(0, len(ct_reason)): + if ct_reason[i][0] == 0: + continue + print("%-35s %-8u" % (exit_reasons[ct_reason[i][1]], ct_reason[i][0])) \ No newline at end of file diff --git a/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit_example.txt b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit_example.txt new file mode 100644 index 000000000..3ee773bbe --- /dev/null +++ b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit_example.txt @@ -0,0 +1,250 @@ +Demonstrations of kvm exit reasons, the Linux eBPF/bcc version. + + +Considering virtual machines' frequent exits can cause performance problems, +this tool aims to locate the frequent exited reasons and then find solutions +to reduce or even avoid the exit, by displaying the detail exit reasons and +the counts of each vm exit for all vms running on one physical machine. + + +Features of this tool +===================== + +- Although there is a patch: [KVM: x86: add full vm-exit reason debug entries] + (https://patchwork.kernel.org/project/kvm/patch/1555939499-30854-1-git-send-email-pizhenwei@bytedance.com/) + trying to fill more vm-exit reason debug entries, just as the comments said, + the code allocates lots of memory that may never be consumed, misses some + arch-specific kvm causes, and can not do kernel aggregation. Instead bcc, as + a user space tool, can implement all these functions more easily and flexibly. +- The bcc python logic could provide nice kernel aggregation and custom output, + like collpasing all tids for one pid (e.i. one vm's qemu process id) with exit + reasons sorted in descending order. For more information, see the following + #USAGE message. +- The bpf in-kernel percpu_array and percpu_cache further improves performance. + For more information, see the following #Help to understand. + + +Limited +======= + +In view of the hardware-assisted virtualization technology of +different architectures, currently we only adapt on vmx in intel. +And the amd feature is on the road.. + + +Example output: +=============== + +# ./kvmexit.py +Display kvm exit reasons and statistics for all threads... Hit Ctrl-C to end. +PID TID KVM_EXIT_REASON COUNT +^C1273551 1273568 EXIT_REASON_HLT 12 +1273551 1273568 EXIT_REASON_MSR_WRITE 6 +1274253 1274261 EXIT_REASON_EXTERNAL_INTERRUPT 1 +1274253 1274261 EXIT_REASON_HLT 12 +1274253 1274261 EXIT_REASON_MSR_WRITE 4 + +# ./kvmexit.py 6 +Display kvm exit reasons and statistics for all threads after sleeping 6 secs. +PID TID KVM_EXIT_REASON COUNT +1273903 1273922 EXIT_REASON_EXTERNAL_INTERRUPT 175 +1273903 1273922 EXIT_REASON_CPUID 10 +1273903 1273922 EXIT_REASON_HLT 6043 +1273903 1273922 EXIT_REASON_IO_INSTRUCTION 24 +1273903 1273922 EXIT_REASON_MSR_WRITE 15025 +1273903 1273922 EXIT_REASON_PAUSE_INSTRUCTION 11 +1273903 1273922 EXIT_REASON_EOI_INDUCED 12 +1273903 1273922 EXIT_REASON_EPT_VIOLATION 6 +1273903 1273922 EXIT_REASON_EPT_MISCONFIG 380 +1273903 1273922 EXIT_REASON_PREEMPTION_TIMER 194 +1273551 1273568 EXIT_REASON_EXTERNAL_INTERRUPT 18 +1273551 1273568 EXIT_REASON_HLT 989 +1273551 1273568 EXIT_REASON_IO_INSTRUCTION 10 +1273551 1273568 EXIT_REASON_MSR_WRITE 2205 +1273551 1273568 EXIT_REASON_PAUSE_INSTRUCTION 1 +1273551 1273568 EXIT_REASON_EOI_INDUCED 5 +1273551 1273568 EXIT_REASON_EPT_MISCONFIG 61 +1273551 1273568 EXIT_REASON_PREEMPTION_TIMER 14 + +# ./kvmexit.py -p 1273795 5 +Display kvm exit reasons and statistics for PID 1273795 after sleeping 5 secs. +KVM_EXIT_REASON COUNT +MSR_WRITE 13467 +HLT 5060 +PREEMPTION_TIMER 345 +EPT_MISCONFIG 264 +EXTERNAL_INTERRUPT 169 +EPT_VIOLATION 18 +PAUSE_INSTRUCTION 6 +IO_INSTRUCTION 4 +EOI_INDUCED 2 + +# ./kvmexit.py -p 1273795 5 -a +Display kvm exit reasons and statistics for PID 1273795 and its all threads after sleeping 5 secs. +TID KVM_EXIT_REASON COUNT +1273819 EXTERNAL_INTERRUPT 64 +1273819 HLT 2802 +1273819 IO_INSTRUCTION 4 +1273819 MSR_WRITE 7196 +1273819 PAUSE_INSTRUCTION 2 +1273819 EOI_INDUCED 2 +1273819 EPT_VIOLATION 6 +1273819 EPT_MISCONFIG 162 +1273819 PREEMPTION_TIMER 194 +1273820 EXTERNAL_INTERRUPT 78 +1273820 HLT 2054 +1273820 MSR_WRITE 5199 +1273820 EPT_VIOLATION 2 +1273820 EPT_MISCONFIG 77 +1273820 PREEMPTION_TIMER 102 + +# ./kvmexit.py -p 1273795 -v 0 +Display kvm exit reasons and statistics for PID 1273795 VCPU 0... Hit Ctrl-C to end. +KVM_EXIT_REASON COUNT +^CMSR_WRITE 2076 +HLT 795 +PREEMPTION_TIMER 86 +EXTERNAL_INTERRUPT 20 +EPT_MISCONFIG 10 +PAUSE_INSTRUCTION 2 +IO_INSTRUCTION 2 +EPT_VIOLATION 1 +EOI_INDUCED 1 + +# ./kvmexit.py -p 1273795 -v 0 4 +Display kvm exit reasons and statistics for PID 1273795 VCPU 0 after sleeping 4 secs. +KVM_EXIT_REASON COUNT +MSR_WRITE 4726 +HLT 1827 +PREEMPTION_TIMER 78 +EPT_MISCONFIG 67 +EXTERNAL_INTERRUPT 28 +IO_INSTRUCTION 4 +EOI_INDUCED 2 +PAUSE_INSTRUCTION 2 + +# ./kvmexit.py -p 1273795 -v 4 4 +Traceback (most recent call last): + File "tools/kvmexit.py", line 306, in + raise Exception("There's no v%s for PID %d." % (tgt_vcpu, args.pid)) + Exception: There's no vCPU 4 for PID 1273795. + +# ./kvmexit.py -t 1273819 10 +Display kvm exit reasons and statistics for TID 1273819 after sleeping 10 secs. +KVM_EXIT_REASON COUNT +MSR_WRITE 13318 +HLT 5274 +EPT_MISCONFIG 263 +PREEMPTION_TIMER 171 +EXTERNAL_INTERRUPT 109 +IO_INSTRUCTION 8 +PAUSE_INSTRUCTION 5 +EOI_INDUCED 4 +EPT_VIOLATION 2 + +# ./kvmexit.py -T '1273820,1273819' +Display kvm exit reasons and statistics for TIDS ['1273820', '1273819']... Hit Ctrl-C to end. +TIDS KVM_EXIT_REASON COUNT +^C1273819 EXTERNAL_INTERRUPT 300 +1273819 HLT 13718 +1273819 IO_INSTRUCTION 26 +1273819 MSR_WRITE 37457 +1273819 PAUSE_INSTRUCTION 13 +1273819 EOI_INDUCED 13 +1273819 EPT_VIOLATION 53 +1273819 EPT_MISCONFIG 654 +1273819 PREEMPTION_TIMER 958 +1273820 EXTERNAL_INTERRUPT 212 +1273820 HLT 9002 +1273820 MSR_WRITE 25495 +1273820 PAUSE_INSTRUCTION 2 +1273820 EPT_VIOLATION 64 +1273820 EPT_MISCONFIG 396 +1273820 PREEMPTION_TIMER 268 + + +Help to understand +================== + +We use a PERCPU_ARRAY: pcpuArrayA and a percpu_hash: hashA to collaboratively +store each kvm exit reason and its count. The reason is there exists a rule when +one vcpu exits and re-enters, it tends to continue to run on the same physical +cpu (pcpu as follows) as the last cycle, which is also called 'cache hit'. Thus +we turn to use a PERCPU_ARRAY to record the 'cache hit' situation to speed +things up; and for other cases, then use a percpu_hash. + +BTW, we originally use a common hash to do this, with a u64(exit_reason) +key and a struct exit_info {tgid_pid, exit_reason} value. But due to +the big lock in bpf_hash, each updating is quite performance consuming. + +Now imagine here is a pid_tgidA (vcpu A) exits and is going to run on +pcpuArrayA, the BPF code flow is as follows: + + pid_tgidA keeps running on the same pcpu + // \\ + // \\ + // Y N \\ + // \\ + a. cache_hit b. cache_miss +(cacheA's pid_tgid matches pid_tgidA) || + | || + | || + "increase percpu exit_ct and return" || + [*Note*] || + pid_tgidA ever been exited on pcpuArrayA? + // \\ + // \\ + // \\ + // Y N \\ + // \\ + b.a load_last_hashA b.b initialize_hashA_with_zero + \ / + \ / + \ / + "increase percpu exit_ct" + || + || + is another pid_tgid been running on pcpuArrayA? + // \\ + // Y N \\ + // \\ + b.*.a save_theLastHit_hashB do_nothing + \\ // + \\ // + \\ // + b.* save_to_pcpuArrayA + + +[*Note*] we do not update the table in above "a.", in case the vcpu hit the same +pcpu again when exits next time, instead we only update until this pcpu is not +hitted by the same tgidpid(vcpu) again, which is in "b.*.a" and "b.*". + + +USAGE message: +============== + +# ./kvmexit.py -h +usage: kvmexit.py [-h] [-p PID [-v VCPU | -a] ] [-t TID | -T 'TID1,TID2'] [duration] + +Display kvm_exit_reason and its statistics at a timed interval + +optional arguments: + -h, --help show this help message and exit + -p PID, --pid PID display process with this PID only, collpase all tids with exit reasons sorted in descending order + -v VCPU, --v VCPU display this VCPU only for this PID + -a, --alltids display all TIDS for this PID + -t TID, --tid TID display thread with this TID only with exit reasons sorted in descending order + -T 'TID1,TID2', --tids 'TID1,TID2' + display threads for a union like {395490, 395491} + duration duration of display, after sleeping several seconds + +examples: + ./kvmexit # Display kvm_exit_reason and its statistics in real-time until Ctrl-C + ./kvmexit 5 # Display in real-time after sleeping 5s + ./kvmexit -p 3195281 # Collpase all tids for pid 3195281 with exit reasons sorted in descending order + ./kvmexit -p 3195281 20 # Collpase all tids for pid 3195281 with exit reasons sorted in descending order, and display after sleeping 20s + ./kvmexit -p 3195281 -v 0 # Display only vcpu0 for pid 3195281, descending sort by default + ./kvmexit -p 3195281 -a # Display all tids for pid 3195281 + ./kvmexit -t 395490 # Display only for tid 395490 with exit reasons sorted in descending order + ./kvmexit -t 395490 20 # Display only for tid 395490 with exit reasons sorted in descending order after sleeping 20s + ./kvmexit -T '395490,395491' # Display for a union like {395490, 395491} \ No newline at end of file From c7e7088770cc949cbc6c6f139160b2a6ecb8ae4d Mon Sep 17 00:00:00 2001 From: Y_y_s <78297703+Monkey857@users.noreply.github.com> Date: Fri, 2 Feb 2024 18:01:36 +0800 Subject: [PATCH 02/15] Update kvmexit.py --- eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py index 2f82530f0..dd157488f 100644 --- a/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py +++ b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py @@ -19,7 +19,7 @@ # @KVM_EXIT_REASON: the reason why the vm exits. # @COUNT: the counts of the @KVM_EXIT_REASONS. # -# REQUIRES: Linux 6.2 (BPF_PROG_TYPE_TRACEPOINT support) +# REQUIRES: Linux 4.7+ (BPF_PROG_TYPE_TRACEPOINT support) # # Copyright (c) 2024 YYS. All rights reserved. # Original code © 2024 ByteDance Inc. All rights reserved. @@ -375,4 +375,4 @@ def find_tid(tgt_dir, tgt_vcpu): for i in range(0, len(ct_reason)): if ct_reason[i][0] == 0: continue - print("%-35s %-8u" % (exit_reasons[ct_reason[i][1]], ct_reason[i][0])) \ No newline at end of file + print("%-35s %-8u" % (exit_reasons[ct_reason[i][1]], ct_reason[i][0])) From dbb0fc656ea959c7a26e701973c920ed22ce4b8a Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 1 Mar 2024 12:46:49 +0800 Subject: [PATCH 03/15] VMexit --- .../kvm_watcher/include/kvm_exits.h | 85 ++++++++++--------- .../kvm_watcher/include/kvm_watcher.h | 15 +++- .../kvm_watcher/src/kvm_watcher.bpf.c | 2 +- .../kvm_watcher/src/kvm_watcher.c | 75 +++++++++++----- 4 files changed, 116 insertions(+), 61 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h index d41e5cc3f..a7fe15b73 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h @@ -24,21 +24,25 @@ #include #include #include -// 定义哈希结构,存储时间信息 + +#define EXIT_REASON_HLT 12 + struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 8192); - __type(key, pid_t); - __type(value, struct reason_info); -} times SEC(".maps"); + __type(key, struct exit_key); //exit_key:reason pid pad[2] + __type(value, struct exit_value); //exit_value : max_time total_time min_time count pad +} exit_map SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 8192); - __type(key, u32); - __type(value, u32); -} counts SEC(".maps"); -// 记录退出的信息 + __type(key, pid_t); + __type(value, struct reason_info); //reason_info:time、reason、count +} times SEC(".maps"); + + + struct exit { u64 pad; unsigned int exit_reason; @@ -51,38 +55,29 @@ struct exit { unsigned int vcpu_id; }; -int total = 0; -// 记录vm_exit的原因以及时间 static int trace_kvm_exit(struct exit *ctx, pid_t vm_pid) { CHECK_PID(vm_pid); + u32 reason; + reason = (u32)ctx->exit_reason; + //如果是节能停止退出,就不采集数据 + if(reason==EXIT_REASON_HLT){ + return 0; + } u64 id, ts; id = bpf_get_current_pid_tgid(); pid_t tid = (u32)id; ts = bpf_ktime_get_ns(); - u32 reason; - reason = (u32)ctx->exit_reason; struct reason_info reas = {}; reas.reason = reason; reas.time = ts; - u32 *count; - count = bpf_map_lookup_elem(&counts, &reason); - if (count) { - (*count)++; - reas.count = *count; - bpf_map_update_elem(&counts, &reason, count, BPF_ANY); - } else { - u32 new_count = 1; - reas.count = new_count; - bpf_map_update_elem(&counts, &reason, &new_count, BPF_ANY); - } bpf_map_update_elem(×, &tid, &reas, BPF_ANY); return 0; } -// 通过kvm_exit所记录的信息,来计算出整个处理的时间 -static int trace_kvm_entry(void *rb, struct common_event *e) { + +static int trace_kvm_entry() { struct reason_info *reas; pid_t pid, tid; - u64 id, ts, *start_ts, duration_ns = 0; + u64 id, ts, *start_ts, duration_ns; id = bpf_get_current_pid_tgid(); pid = id >> 32; tid = (u32)id; @@ -90,22 +85,32 @@ static int trace_kvm_entry(void *rb, struct common_event *e) { if (!reas) { return 0; } - u32 reason; - int count = 0; duration_ns = bpf_ktime_get_ns() - reas->time; bpf_map_delete_elem(×, &tid); - reason = reas->reason; - count = reas->count; - RESERVE_RINGBUF_ENTRY(rb, e); - e->exit_data.reason_number = reason; - e->process.pid = pid; - e->process.tid = tid; - e->exit_data.duration_ns = duration_ns; - bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); - e->exit_data.total = ++total; - e->exit_data.count = count; - e->time = reas->time; - bpf_ringbuf_submit(e, 0); + struct exit_key exit_key; + __builtin_memset(&exit_key, 0, sizeof(struct exit_key)); + exit_key.pid=pid; + exit_key.reason=reas->reason; + struct exit_value *exit_value; + exit_value = bpf_map_lookup_elem(&exit_map, &exit_key); + if (exit_value) { + exit_value->count ++; + exit_value->total_time +=duration_ns; + if(exit_value->max_time < duration_ns){ + exit_value->max_time = duration_ns; + } + if(exit_value->min_time > duration_ns){ + exit_value->min_time=duration_ns; + } + } else { + struct exit_value new_exit_value = { + .count=1, + .max_time=duration_ns, + .total_time=duration_ns, + .min_time=duration_ns + }; + bpf_map_update_elem(&exit_map, &exit_key, &new_exit_value, BPF_ANY); + } return 0; } #endif /* __KVM_EXITS_H */ diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h index 55d55343b..c388f0f2e 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h @@ -29,7 +29,7 @@ #define NS_TO_MS_WITH_DECIMAL(ns) ((double)(ns) / NS_TO_MS_FACTOR) #define MICROSECONDS_IN_SECOND 1000000 -#define OUTPUT_INTERVAL_SECONDS 0.5 +#define OUTPUT_INTERVAL_SECONDS 2 #define OUTPUT_INTERVAL(us) usleep((__u32)(us * MICROSECONDS_IN_SECOND)) @@ -96,7 +96,20 @@ struct ExitReason { struct reason_info { __u64 time; __u64 reason; +}; + +struct exit_key { + __u64 reason; + __u32 pid; + __u32 pad; +}; + +struct exit_value { + __u64 max_time; + __u64 total_time; + __u64 min_time; __u32 count; + __u32 pad; }; struct dirty_page_info { diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c index 0564e4704..b5c9e5462 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c @@ -60,7 +60,7 @@ int tp_exit(struct exit *ctx) { // 记录vm_entry和vm_exit的时间差 SEC("tp/kvm/kvm_entry") int tp_entry(struct exit *ctx) { - return trace_kvm_entry(&rb, e); + return trace_kvm_entry(); } SEC("kprobe/mark_page_dirty_in_slot") diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 127b5c69c..700aadc41 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -490,22 +490,6 @@ static int handle_event(void *ctx, void *data, size_t data_sz) { break; } case EXIT: { - char info_buffer[256]; - // 使用 e->exit_data 访问 EXIT 特有成员 - printf("%-18.6f %-2d/%-18s %-18s %-6u/%-8u %-8d %-13.4f \n", - timestamp_ms, e->exit_data.reason_number, - getExitReasonName(e->exit_data.reason_number), - e->process.comm, e->process.pid, e->process.tid, - e->exit_data.count, - NS_TO_US_WITH_DECIMAL(e->exit_data.duration_ns)); - - if (env.ShowStats) { - snprintf(info_buffer, sizeof(info_buffer), "%-18s %-8u %-8d", - e->process.comm, e->process.pid, e->exit_data.count); - addExitInfo(&exitInfoBuffer, e->exit_data.reason_number, - info_buffer, e->exit_data.duration_ns, - e->exit_data.count); - } break; } case HALT_POLL: { @@ -661,8 +645,8 @@ static int print_event_head(struct env *env) { "VAILD?"); break; case EXIT: - printf("%-18s %-21s %-18s %-15s %-8s %-13s \n", "TIME(ms)", - "EXIT_REASON", "COMM", "PID/TID", "COUNT", "DURATION(us)"); + // printf("%-18s %-21s %-18s %-15s %-8s %-13s \n", "TIME(ms)", + // "EXIT_REASON", "COMM", "PID/TID", "COUNT", "DURATION(us)"); break; case HALT_POLL: printf("%-18s %-15s %-15s %-10s %-7s %-11s %-10s\n", "TIME(ms)", @@ -734,6 +718,57 @@ static void set_disable_load(struct kvm_watcher_bpf *skel) { env.execute_irq_inject ? true : false); } +int print_exit_map(struct kvm_watcher_bpf *skel) { + int fd = bpf_map__fd(skel->maps.exit_map); + int err; + struct exit_key lookup_key = {}; + struct exit_key next_key = {}; + struct exit_value exit_value; + struct tm *tm; + char ts[32]; + time_t t; + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + int first_run = 1; + // Iterate over the map + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + if (first_run) { + first_run = 0; + printf("\nTIME:%s\n", ts); + printf("%-12s %-12s %-12s %-12s %-12s %-12s\n", "pid", "total_time", + "max_time","min_time", "counts", "reason"); + printf( + "------------ ------------ ------------ ------------ ------------ " + "------------\n"); + } + // Print the current entry + err = bpf_map_lookup_elem(fd, &next_key, &exit_value); + if (err < 0) { + fprintf(stderr, "failed to lookup exit_value: %d\n", err); + return -1; + } + printf("%-12d %-12.4f %-12.4f %-12.4f %-12u %-12s\n", next_key.pid, + NS_TO_MS_WITH_DECIMAL(exit_value.total_time), + NS_TO_MS_WITH_DECIMAL(exit_value.max_time), + NS_TO_MS_WITH_DECIMAL(exit_value.min_time),exit_value.count, + getExitReasonName(next_key.reason)); + + // Move to the next key + lookup_key = next_key; + } + memset(&lookup_key, 0, sizeof(struct exit_key)); + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + err = bpf_map_delete_elem(fd, &next_key); + if (err < 0) { + fprintf(stderr, "failed to cleanup counters: %d\n", err); + return -1; + } + lookup_key = next_key; + } + return 0; +} + int main(int argc, char **argv) { // 定义一个环形缓冲区 struct ring_buffer *rb = NULL; @@ -806,8 +841,10 @@ int main(int argc, char **argv) { goto cleanup; } while (!exiting) { - // OUTPUT_INTERVAL(OUTPUT_INTERVAL_SECONDS); // 输出间隔 + err = ring_buffer__poll(rb, RING_BUFFER_TIMEOUT_MS /* timeout, ms */); + sleep(3); + err = print_exit_map(skel); /* Ctrl-C will cause -EINTR */ if (err == -EINTR) { err = 0; From 9190fa17b6e8c93a0d9e070da1394ddac212a52a Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 1 Mar 2024 13:24:30 +0800 Subject: [PATCH 04/15] =?UTF-8?q?=E4=BF=AE=E6=94=B9action?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- eBPF_Supermarket/kvm_watcher/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eBPF_Supermarket/kvm_watcher/Makefile b/eBPF_Supermarket/kvm_watcher/Makefile index b330f8503..7aebb9cf2 100644 --- a/eBPF_Supermarket/kvm_watcher/Makefile +++ b/eBPF_Supermarket/kvm_watcher/Makefile @@ -14,7 +14,8 @@ define common_rules1 # 安装依赖 sudo apt install clang libelf1 libelf-dev zlib1g-dev libbpf-dev linux-tools-$$(uname -r) linux-cloud-tools-$$(uname -r) # 加载KVM模块 - sudo modprobe kvm && sudo modprobe kvm-intel + sudo modprobe kvm + sudo modprobe kvm-intel # 生成vmlinux.h文件 bpftool btf dump file /sys/kernel/btf/kvm format c > ./include/vmlinux.h endef From 47a3077f081b3f1a94397903acee146a073b7caf Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 1 Mar 2024 13:31:18 +0800 Subject: [PATCH 05/15] =?UTF-8?q?=E4=BF=AE=E6=94=B9action?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- eBPF_Supermarket/kvm_watcher/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eBPF_Supermarket/kvm_watcher/Makefile b/eBPF_Supermarket/kvm_watcher/Makefile index 7aebb9cf2..887783cd4 100644 --- a/eBPF_Supermarket/kvm_watcher/Makefile +++ b/eBPF_Supermarket/kvm_watcher/Makefile @@ -15,7 +15,7 @@ define common_rules1 sudo apt install clang libelf1 libelf-dev zlib1g-dev libbpf-dev linux-tools-$$(uname -r) linux-cloud-tools-$$(uname -r) # 加载KVM模块 sudo modprobe kvm - sudo modprobe kvm-intel + #sudo modprobe kvm-intel # 生成vmlinux.h文件 bpftool btf dump file /sys/kernel/btf/kvm format c > ./include/vmlinux.h endef From 389a1921e2647a6df62145022583c5f43648eb84 Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 1 Mar 2024 13:36:39 +0800 Subject: [PATCH 06/15] =?UTF-8?q?=E4=BF=AE=E6=94=B9makefile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- eBPF_Supermarket/kvm_watcher/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/Makefile b/eBPF_Supermarket/kvm_watcher/Makefile index 887783cd4..a6de86633 100644 --- a/eBPF_Supermarket/kvm_watcher/Makefile +++ b/eBPF_Supermarket/kvm_watcher/Makefile @@ -7,15 +7,14 @@ ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ | sed 's/riscv64/riscv/' \ | sed 's/loongarch64/loongarch/') APP = src/kvm_watcher -OPTIONS = -f -w -n -d -c '-e -s' +OPTIONS = -f -w -n -d -c -e # 共同规则1 define common_rules1 # 安装依赖 sudo apt install clang libelf1 libelf-dev zlib1g-dev libbpf-dev linux-tools-$$(uname -r) linux-cloud-tools-$$(uname -r) # 加载KVM模块 - sudo modprobe kvm - #sudo modprobe kvm-intel + sudo modprobe kvm && modprobe kvm-intel # 生成vmlinux.h文件 bpftool btf dump file /sys/kernel/btf/kvm format c > ./include/vmlinux.h endef From 8d1830634aa97305c7142bc911275b079d6264cd Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 1 Mar 2024 13:46:05 +0800 Subject: [PATCH 07/15] update makefile --- eBPF_Supermarket/kvm_watcher/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/Makefile b/eBPF_Supermarket/kvm_watcher/Makefile index a6de86633..fe0ad9eb5 100644 --- a/eBPF_Supermarket/kvm_watcher/Makefile +++ b/eBPF_Supermarket/kvm_watcher/Makefile @@ -7,14 +7,15 @@ ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ | sed 's/riscv64/riscv/' \ | sed 's/loongarch64/loongarch/') APP = src/kvm_watcher -OPTIONS = -f -w -n -d -c -e +OPTIONS = -f -w -n -d -e # 共同规则1 define common_rules1 # 安装依赖 sudo apt install clang libelf1 libelf-dev zlib1g-dev libbpf-dev linux-tools-$$(uname -r) linux-cloud-tools-$$(uname -r) # 加载KVM模块 - sudo modprobe kvm && modprobe kvm-intel + sudo modprobe kvm + #sudo modprobe kvm-intel # 生成vmlinux.h文件 bpftool btf dump file /sys/kernel/btf/kvm format c > ./include/vmlinux.h endef From f4f9ce230c0dcf944432ceba4838fce4086ac917 Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 1 Mar 2024 14:15:41 +0800 Subject: [PATCH 08/15] update yml --- .github/workflows/kvm_watcher.yml | 2 +- eBPF_Supermarket/kvm_watcher/Makefile | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/kvm_watcher.yml b/.github/workflows/kvm_watcher.yml index 49b22a6c0..0a3f7525a 100644 --- a/.github/workflows/kvm_watcher.yml +++ b/.github/workflows/kvm_watcher.yml @@ -23,5 +23,5 @@ jobs: - name: Test program execution run: | cd eBPF_Supermarket/kvm_watcher/ - make test + make diff --git a/eBPF_Supermarket/kvm_watcher/Makefile b/eBPF_Supermarket/kvm_watcher/Makefile index fe0ad9eb5..5264cb9a3 100644 --- a/eBPF_Supermarket/kvm_watcher/Makefile +++ b/eBPF_Supermarket/kvm_watcher/Makefile @@ -13,9 +13,6 @@ OPTIONS = -f -w -n -d -e define common_rules1 # 安装依赖 sudo apt install clang libelf1 libelf-dev zlib1g-dev libbpf-dev linux-tools-$$(uname -r) linux-cloud-tools-$$(uname -r) - # 加载KVM模块 - sudo modprobe kvm - #sudo modprobe kvm-intel # 生成vmlinux.h文件 bpftool btf dump file /sys/kernel/btf/kvm format c > ./include/vmlinux.h endef @@ -45,11 +42,13 @@ ifeq ($(MAKECMDGOALS),test) ifeq ($(shell grep -Eoc '(vmx|svm)' /proc/cpuinfo),0) $(error "The CPU in your device does not support virtualization!") endif + # 加载KVM模块 + sudo modprobe kvm && sudo modprobe kvm-intel + $(common_rules1) + $(common_rules2) ifeq ($(wildcard ./cirros-0.5.2-x86_64-disk.img),) wget https://gitee.com/nan-shuaibo/cirros/releases/download/0.5.2/cirros-0.5.2-x86_64-disk.img endif - $(common_rules1) - $(common_rules2) # 安装 qemu $(INSTALL_QEMU) # 启动虚拟机 From 22489668a2e588e01b1f4b8aff43215549a2e7e9 Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 1 Mar 2024 14:21:13 +0800 Subject: [PATCH 09/15] =?UTF-8?q?=E8=B0=83=E6=95=B4=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kvm_watcher/include/kvm_exits.h | 37 +++++++++---------- .../kvm_watcher/src/kvm_watcher.c | 13 ++++--- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h index a7fe15b73..4171c89e8 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h @@ -25,24 +25,23 @@ #include #include -#define EXIT_REASON_HLT 12 +#define EXIT_REASON_HLT 12 struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 8192); - __type(key, struct exit_key); //exit_key:reason pid pad[2] - __type(value, struct exit_value); //exit_value : max_time total_time min_time count pad + __type(key, struct exit_key); // exit_key:reason pid pad[2] + __type(value, struct exit_value); // exit_value : max_time total_time + // min_time count pad } exit_map SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 8192); __type(key, pid_t); - __type(value, struct reason_info); //reason_info:time、reason、count + __type(value, struct reason_info); // reason_info:time、reason、count } times SEC(".maps"); - - struct exit { u64 pad; unsigned int exit_reason; @@ -60,7 +59,7 @@ static int trace_kvm_exit(struct exit *ctx, pid_t vm_pid) { u32 reason; reason = (u32)ctx->exit_reason; //如果是节能停止退出,就不采集数据 - if(reason==EXIT_REASON_HLT){ + if (reason == EXIT_REASON_HLT) { return 0; } u64 id, ts; @@ -89,26 +88,24 @@ static int trace_kvm_entry() { bpf_map_delete_elem(×, &tid); struct exit_key exit_key; __builtin_memset(&exit_key, 0, sizeof(struct exit_key)); - exit_key.pid=pid; - exit_key.reason=reas->reason; + exit_key.pid = pid; + exit_key.reason = reas->reason; struct exit_value *exit_value; exit_value = bpf_map_lookup_elem(&exit_map, &exit_key); if (exit_value) { - exit_value->count ++; - exit_value->total_time +=duration_ns; - if(exit_value->max_time < duration_ns){ + exit_value->count++; + exit_value->total_time += duration_ns; + if (exit_value->max_time < duration_ns) { exit_value->max_time = duration_ns; } - if(exit_value->min_time > duration_ns){ - exit_value->min_time=duration_ns; + if (exit_value->min_time > duration_ns) { + exit_value->min_time = duration_ns; } } else { - struct exit_value new_exit_value = { - .count=1, - .max_time=duration_ns, - .total_time=duration_ns, - .min_time=duration_ns - }; + struct exit_value new_exit_value = {.count = 1, + .max_time = duration_ns, + .total_time = duration_ns, + .min_time = duration_ns}; bpf_map_update_elem(&exit_map, &exit_key, &new_exit_value, BPF_ANY); } return 0; diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 700aadc41..3c11e0b3a 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -646,7 +646,8 @@ static int print_event_head(struct env *env) { break; case EXIT: // printf("%-18s %-21s %-18s %-15s %-8s %-13s \n", "TIME(ms)", - // "EXIT_REASON", "COMM", "PID/TID", "COUNT", "DURATION(us)"); + // "EXIT_REASON", "COMM", "PID/TID", "COUNT", + // "DURATION(us)"); break; case HALT_POLL: printf("%-18s %-15s %-15s %-10s %-7s %-11s %-10s\n", "TIME(ms)", @@ -737,9 +738,10 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { first_run = 0; printf("\nTIME:%s\n", ts); printf("%-12s %-12s %-12s %-12s %-12s %-12s\n", "pid", "total_time", - "max_time","min_time", "counts", "reason"); + "max_time", "min_time", "counts", "reason"); printf( - "------------ ------------ ------------ ------------ ------------ " + "------------ ------------ ------------ ------------ " + "------------ " "------------\n"); } // Print the current entry @@ -751,7 +753,7 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { printf("%-12d %-12.4f %-12.4f %-12.4f %-12u %-12s\n", next_key.pid, NS_TO_MS_WITH_DECIMAL(exit_value.total_time), NS_TO_MS_WITH_DECIMAL(exit_value.max_time), - NS_TO_MS_WITH_DECIMAL(exit_value.min_time),exit_value.count, + NS_TO_MS_WITH_DECIMAL(exit_value.min_time), exit_value.count, getExitReasonName(next_key.reason)); // Move to the next key @@ -841,7 +843,6 @@ int main(int argc, char **argv) { goto cleanup; } while (!exiting) { - err = ring_buffer__poll(rb, RING_BUFFER_TIMEOUT_MS /* timeout, ms */); sleep(3); err = print_exit_map(skel); @@ -863,7 +864,7 @@ int main(int argc, char **argv) { if (err < 0) { printf("Save count dirty page map to file fail: %d\n", err); goto cleanup; - }else{ + } else { printf("\nSave count dirty page map to file success!\n"); goto cleanup; } From 1e4a8804c44cde069165af4d8da6e05050406b86 Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 8 Mar 2024 11:23:59 +0800 Subject: [PATCH 10/15] vm exit --- .../kvm_watcher/include/kvm_exits.h | 1 + .../kvm_watcher/include/kvm_watcher.h | 1 + .../kvm_watcher/src/kvm_watcher.c | 96 +++++++++++++++---- 3 files changed, 79 insertions(+), 19 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h index 4171c89e8..9fb034a07 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h @@ -89,6 +89,7 @@ static int trace_kvm_entry() { struct exit_key exit_key; __builtin_memset(&exit_key, 0, sizeof(struct exit_key)); exit_key.pid = pid; + exit_key.tid = tid; exit_key.reason = reas->reason; struct exit_value *exit_value; exit_value = bpf_map_lookup_elem(&exit_map, &exit_key); diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h index 3a8a8df61..25a00063c 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h @@ -96,6 +96,7 @@ struct reason_info { struct exit_key { __u64 reason; __u32 pid; + __u32 tid; __u32 pad; }; diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index b1261b056..1c22db470 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -785,13 +785,56 @@ int print_hc_map(struct kvm_watcher_bpf *skel) { } return 0; } +// In order to sort vm_exit maps +int sort_by_key(struct kvm_watcher_bpf *skel, struct exit_key *keys, + struct exit_value *values) { + int fd = bpf_map__fd(skel->maps.exit_map); + int err = 0; + struct exit_key lookup_key = {}; + struct exit_key next_key = {}; + struct exit_value exit_value; + int first = 1; + int i = 0, j; + int count = 0; + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + count++; + if (first) { + first = 0; + bpf_map_lookup_elem(fd, &next_key, &exit_value); + keys[0] = next_key; + values[0] = exit_value; + i++; + continue; + } + err = bpf_map_lookup_elem(fd, &next_key, &exit_value); + if (err < 0) { + fprintf(stderr, "failed to lookup exit_value: %d\n", err); + return -1; + } + //insert sort + j = i - 1; + struct exit_key temp_key = next_key; + struct exit_value temp_value = exit_value; + while (j >= 0 && + (keys[j].pid > temp_key.pid || (keys[j].tid > temp_key.tid))) { + keys[j + 1] = keys[j]; + values[j + 1] = values[j]; + j--; + } + i++; + keys[j + 1] = next_key; + values[j + 1] = temp_value; + // Move to the next key + lookup_key = next_key; + } + return count; +} int print_exit_map(struct kvm_watcher_bpf *skel) { int fd = bpf_map__fd(skel->maps.exit_map); int err; struct exit_key lookup_key = {}; struct exit_key next_key = {}; - struct exit_value exit_value; struct tm *tm; char ts[32]; time_t t; @@ -799,33 +842,46 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { tm = localtime(&t); strftime(ts, sizeof(ts), "%H:%M:%S", tm); int first_run = 1; - // Iterate over the map - while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + struct exit_key keys[8192]; + struct exit_value values[8192]; + int count = sort_by_key(skel, keys, values); + // Iterate over the array + __u32 pid = 0; + __u32 tid = 0; + for (int i = 0; i < count; i++) { if (first_run) { first_run = 0; printf("\nTIME:%s\n", ts); - printf("%-12s %-12s %-12s %-12s %-12s %-12s\n", "pid", "total_time", - "max_time", "min_time", "counts", "reason"); + printf("%-12s %-12s %-12s %-12s %-12s %-12s %-12s\n", "pid", "tid", + "total_time", "max_time", "min_time", "counts", "reason"); printf( "------------ ------------ ------------ ------------ " "------------ " "------------\n"); } // Print the current entry - err = bpf_map_lookup_elem(fd, &next_key, &exit_value); - if (err < 0) { - fprintf(stderr, "failed to lookup exit_value: %d\n", err); - return -1; + if (tid == 0 || tid != keys[i].tid) { + tid = keys[i].tid; + if (pid == 0 || pid != keys[i].pid) { + pid = keys[i].pid; + printf("%-12d", pid); + } else { + printf("%-12s", ""); + } + printf("%-12d %-12.4f %-12.4f %-12.4f %-12u %-12s\n", keys[i].tid, + NS_TO_MS_WITH_DECIMAL(values[i].total_time), + NS_TO_MS_WITH_DECIMAL(values[i].max_time), + NS_TO_MS_WITH_DECIMAL(values[i].min_time), values[i].count, + getExitReasonName(keys[i].reason)); + } else if (tid == keys[i].tid) { + printf("%24s %-12.4f %-12.4f %-12.4f %-12u %-12s\n", "", + NS_TO_MS_WITH_DECIMAL(values[i].total_time), + NS_TO_MS_WITH_DECIMAL(values[i].max_time), + NS_TO_MS_WITH_DECIMAL(values[i].min_time), values[i].count, + getExitReasonName(keys[i].reason)); } - printf("%-12d %-12.4f %-12.4f %-12.4f %-12u %-12s\n", next_key.pid, - NS_TO_MS_WITH_DECIMAL(exit_value.total_time), - NS_TO_MS_WITH_DECIMAL(exit_value.max_time), - NS_TO_MS_WITH_DECIMAL(exit_value.min_time), exit_value.count, - getExitReasonName(next_key.reason)); - - // Move to the next key - lookup_key = next_key; } + // clear the maps memset(&lookup_key, 0, sizeof(struct exit_key)); while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { err = bpf_map_delete_elem(fd, &next_key); @@ -838,7 +894,9 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { return 0; } -void print_map_and_check_error(int (*print_func)(struct kvm_watcher_bpf *), struct kvm_watcher_bpf *skel, const char *map_name, int err) { +void print_map_and_check_error(int (*print_func)(struct kvm_watcher_bpf *), + struct kvm_watcher_bpf *skel, + const char *map_name, int err) { OUTPUT_INTERVAL(OUTPUT_INTERVAL_SECONDS); print_func(skel); if (err < 0) { @@ -919,7 +977,7 @@ int main(int argc, char **argv) { } while (!exiting) { err = ring_buffer__poll(rb, RING_BUFFER_TIMEOUT_MS /* timeout, ms */); - + if (env.execute_hypercall) { print_map_and_check_error(print_hc_map, skel, "hypercall", err); } From 15484302f21aae063cdae8c938edd57014cfb567 Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 8 Mar 2024 17:41:57 +0800 Subject: [PATCH 11/15] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h | 1 - eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h index 25a00063c..a1a81e037 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h @@ -97,7 +97,6 @@ struct exit_key { __u64 reason; __u32 pid; __u32 tid; - __u32 pad; }; struct exit_value { diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 1c22db470..96889285a 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -812,7 +812,7 @@ int sort_by_key(struct kvm_watcher_bpf *skel, struct exit_key *keys, fprintf(stderr, "failed to lookup exit_value: %d\n", err); return -1; } - //insert sort + // insert sort j = i - 1; struct exit_key temp_key = next_key; struct exit_value temp_value = exit_value; @@ -857,6 +857,7 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { printf( "------------ ------------ ------------ ------------ " "------------ " + "------------ " "------------\n"); } // Print the current entry @@ -864,9 +865,9 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { tid = keys[i].tid; if (pid == 0 || pid != keys[i].pid) { pid = keys[i].pid; - printf("%-12d", pid); + printf("%-13d", pid); } else { - printf("%-12s", ""); + printf("%-13s", ""); } printf("%-12d %-12.4f %-12.4f %-12.4f %-12u %-12s\n", keys[i].tid, NS_TO_MS_WITH_DECIMAL(values[i].total_time), @@ -874,7 +875,7 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { NS_TO_MS_WITH_DECIMAL(values[i].min_time), values[i].count, getExitReasonName(keys[i].reason)); } else if (tid == keys[i].tid) { - printf("%24s %-12.4f %-12.4f %-12.4f %-12u %-12s\n", "", + printf("%25s %-12.4f %-12.4f %-12.4f %-12u %-12s\n", "", NS_TO_MS_WITH_DECIMAL(values[i].total_time), NS_TO_MS_WITH_DECIMAL(values[i].max_time), NS_TO_MS_WITH_DECIMAL(values[i].min_time), values[i].count, From a1ca34d120c24c2afcabdd8fa826041dadbbad97 Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 22 Mar 2024 14:07:03 +0800 Subject: [PATCH 12/15] modify sort_bug --- .../kvm_watcher/src/kvm_watcher.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 2746149fe..2259a45c4 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -257,6 +257,7 @@ int save_count_dirtypagemap_to_file(struct bpf_map *map) { static struct env { bool execute_vcpu_wakeup; bool execute_exit; + bool execute_vcpu_load; bool execute_halt_poll_ns; bool execute_mark_page_dirty; bool execute_page_fault; @@ -270,6 +271,7 @@ static struct env { enum EventType event_type; } env = { .execute_vcpu_wakeup = false, + .execute_vcpu_load = false, .execute_exit = false, .execute_halt_poll_ns = false, .execute_mark_page_dirty = false, @@ -291,6 +293,7 @@ int option_selected = 0; // 功能标志变量,确保激活子功能 // 具体解释命令行参数 static const struct argp_option opts[] = { {"vcpu_wakeup", 'w', NULL, 0, "Monitoring the wakeup of vcpu."}, + {"vcpu_load", 'o', NULL, 0, "Monitoring the load of vcpu."}, {"vm_exit", 'e', NULL, 0, "Monitoring the event of vm exit."}, {"halt_poll_ns", 'n', NULL, 0, "Monitoring the variation in vCPU halt-polling time."}, @@ -325,6 +328,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) { case 'e': SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_exit); break; + case 'o': + SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_vcpu_load); + break; case 'n': SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_halt_poll_ns); @@ -424,6 +430,8 @@ static int determineEventType(struct env *env) { env->event_type = HYPERCALL; } else if (env->execute_ioctl) { env->event_type = IOCTL; + } else if (env->execute_vcpu_load) { + env->event_type = VCPU_LOAD; } else { env->event_type = NONE_TYPE; // 或者根据需要设置一个默认的事件类型 } @@ -449,6 +457,12 @@ static int handle_event(void *ctx, void *data, size_t data_sz) { case EXIT: { break; } + case VCPU_LOAD: { + printf("%-20.6f %-15s %-6d/%-8d %-10d\n", timestamp_ms, + e->process.comm, e->process.pid, e->process.tid, + e->vcpu_load_data.vcpu_id); + break; + } case HALT_POLL: { // 使用 e->halt_poll_data 访问 HALT_POLL 特有成员 printf("%-18.6f %-15s %-6d/%-8d %-10s %-7d %-7d --> %d \n", @@ -695,6 +709,8 @@ static int print_event_head(struct env *env) { static void set_disable_load(struct kvm_watcher_bpf *skel) { bpf_program__set_autoload(skel->progs.tp_vcpu_wakeup, env.execute_vcpu_wakeup ? true : false); + bpf_program__set_autoload(skel->progs.kp_vmx_vcpu_load, + env.execute_vcpu_load ? true : false); bpf_program__set_autoload(skel->progs.fentry_kvm_vcpu_halt, env.execute_vcpu_wakeup ? true : false); bpf_program__set_autoload(skel->progs.tp_exit, @@ -821,10 +837,10 @@ int sort_by_key(struct kvm_watcher_bpf *skel, struct exit_key *keys, keys[0] = next_key; values[0] = exit_value; i++; + lookup_key = next_key; continue; } err = bpf_map_lookup_elem(fd, &next_key, &exit_value); - if (err < 0) { fprintf(stderr, "failed to lookup exit_value: %d\n", err); return -1; @@ -890,7 +906,7 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { NS_TO_MS_WITH_DECIMAL(values[i].total_time), NS_TO_MS_WITH_DECIMAL(values[i].max_time), NS_TO_MS_WITH_DECIMAL(values[i].min_time), values[i].count, - getName(keys[i].reason, EXIT)); + getName(keys[i].reason, EXIT)); } } // clear the maps @@ -905,7 +921,6 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { } return 0; } - void print_map_and_check_error(int (*print_func)(struct kvm_watcher_bpf *), struct kvm_watcher_bpf *skel, const char *map_name, int err) { @@ -915,7 +930,6 @@ void print_map_and_check_error(int (*print_func)(struct kvm_watcher_bpf *), printf("Error printing %s map: %d\n", map_name, err); } } - int main(int argc, char **argv) { // 定义一个环形缓冲区 struct ring_buffer *rb = NULL; From 0c02211a10cf1b811b211572a9970a98e959435e Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 22 Mar 2024 14:22:36 +0800 Subject: [PATCH 13/15] modify sort_bug --- .../kvm_watcher/src/kvm_watcher.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 2259a45c4..5ece0ec34 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -257,7 +257,6 @@ int save_count_dirtypagemap_to_file(struct bpf_map *map) { static struct env { bool execute_vcpu_wakeup; bool execute_exit; - bool execute_vcpu_load; bool execute_halt_poll_ns; bool execute_mark_page_dirty; bool execute_page_fault; @@ -271,7 +270,6 @@ static struct env { enum EventType event_type; } env = { .execute_vcpu_wakeup = false, - .execute_vcpu_load = false, .execute_exit = false, .execute_halt_poll_ns = false, .execute_mark_page_dirty = false, @@ -293,7 +291,6 @@ int option_selected = 0; // 功能标志变量,确保激活子功能 // 具体解释命令行参数 static const struct argp_option opts[] = { {"vcpu_wakeup", 'w', NULL, 0, "Monitoring the wakeup of vcpu."}, - {"vcpu_load", 'o', NULL, 0, "Monitoring the load of vcpu."}, {"vm_exit", 'e', NULL, 0, "Monitoring the event of vm exit."}, {"halt_poll_ns", 'n', NULL, 0, "Monitoring the variation in vCPU halt-polling time."}, @@ -328,9 +325,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) { case 'e': SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_exit); break; - case 'o': - SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_vcpu_load); - break; case 'n': SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_halt_poll_ns); @@ -430,8 +424,6 @@ static int determineEventType(struct env *env) { env->event_type = HYPERCALL; } else if (env->execute_ioctl) { env->event_type = IOCTL; - } else if (env->execute_vcpu_load) { - env->event_type = VCPU_LOAD; } else { env->event_type = NONE_TYPE; // 或者根据需要设置一个默认的事件类型 } @@ -457,12 +449,6 @@ static int handle_event(void *ctx, void *data, size_t data_sz) { case EXIT: { break; } - case VCPU_LOAD: { - printf("%-20.6f %-15s %-6d/%-8d %-10d\n", timestamp_ms, - e->process.comm, e->process.pid, e->process.tid, - e->vcpu_load_data.vcpu_id); - break; - } case HALT_POLL: { // 使用 e->halt_poll_data 访问 HALT_POLL 特有成员 printf("%-18.6f %-15s %-6d/%-8d %-10s %-7d %-7d --> %d \n", @@ -709,8 +695,6 @@ static int print_event_head(struct env *env) { static void set_disable_load(struct kvm_watcher_bpf *skel) { bpf_program__set_autoload(skel->progs.tp_vcpu_wakeup, env.execute_vcpu_wakeup ? true : false); - bpf_program__set_autoload(skel->progs.kp_vmx_vcpu_load, - env.execute_vcpu_load ? true : false); bpf_program__set_autoload(skel->progs.fentry_kvm_vcpu_halt, env.execute_vcpu_wakeup ? true : false); bpf_program__set_autoload(skel->progs.tp_exit, @@ -841,6 +825,7 @@ int sort_by_key(struct kvm_watcher_bpf *skel, struct exit_key *keys, continue; } err = bpf_map_lookup_elem(fd, &next_key, &exit_value); + if (err < 0) { fprintf(stderr, "failed to lookup exit_value: %d\n", err); return -1; @@ -921,6 +906,7 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { } return 0; } + void print_map_and_check_error(int (*print_func)(struct kvm_watcher_bpf *), struct kvm_watcher_bpf *skel, const char *map_name, int err) { @@ -930,6 +916,7 @@ void print_map_and_check_error(int (*print_func)(struct kvm_watcher_bpf *), printf("Error printing %s map: %d\n", map_name, err); } } + int main(int argc, char **argv) { // 定义一个环形缓冲区 struct ring_buffer *rb = NULL; From 013e14695299ee5bcd3d694e91f49996486397fe Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 22 Mar 2024 14:38:23 +0800 Subject: [PATCH 14/15] add fun --- .../kvm_watcher/include/kvm_vcpu.h | 23 ++++++++++++++++++- .../kvm_watcher/include/kvm_watcher.h | 7 +++++- .../kvm_watcher/src/kvm_watcher.bpf.c | 6 ++++- .../kvm_watcher/src/kvm_watcher.c | 19 ++++++++++++--- 4 files changed, 49 insertions(+), 6 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h index 7bba6477d..4be02a2c4 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h @@ -53,6 +53,7 @@ struct { __type(key, u32); __type(value, u32); } vcpu_tid SEC(".maps"); + // 记录vcpu_halt的id信息 static int trace_kvm_vcpu_halt(struct kvm_vcpu *vcpu, pid_t vm_pid) { CHECK_PID(vm_pid); @@ -102,7 +103,27 @@ static int trace_kvm_halt_poll_ns(struct halt_poll_ns *ctx, void *rb, bpf_ringbuf_submit(e, 0); return 0; } - +//记录VCPU调度的信息 +static int trace_vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu, void *rb, + struct common_event *e) { + RESERVE_RINGBUF_ENTRY(rb, e); + //获取pid&tid + pid_t pid, tid; + u64 id; + id = bpf_get_current_pid_tgid(); + pid = id >> 32; + tid = (u32)id; + // //获取时间 + u64 ts = bpf_ktime_get_ns(); + e->process.pid = pid; + e->process.tid = tid; + e->time = ts; + bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); + bpf_probe_read_kernel(&e->vcpu_load_data.vcpu_id, + sizeof(e->vcpu_load_data.vcpu_id), &vcpu->vcpu_id); + bpf_ringbuf_submit(e, 0); + return 1; +} static int trace_mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn, void *rb, diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h index 97c77244b..d94ea6bfa 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h @@ -89,7 +89,6 @@ struct reason_info { __u64 time; __u64 reason; }; - struct exit_key { __u64 reason; __u32 pid; @@ -137,6 +136,7 @@ struct process { enum EventType { NONE_TYPE, VCPU_WAKEUP, + VCPU_LOAD, EXIT, HALT_POLL, MARK_PAGE_DIRTY, @@ -161,6 +161,11 @@ struct common_event { // VCPU_WAKEUP 特有成员 } vcpu_wakeup_data; + struct { + __u32 vcpu_id; + // VCPU_LOAD 特有成员 + } vcpu_load_data; + struct { __u32 reason_number; __u64 duration_ns; diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c index 406e5d273..e7d480cc3 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c @@ -64,7 +64,11 @@ SEC("tp/kvm/kvm_entry") int tp_entry(struct exit *ctx) { return trace_kvm_entry(); } - +//记录VCPU调度的信息 +SEC("kprobe/vmx_vcpu_load") +int BPF_KPROBE(kp_vmx_vcpu_load, struct kvm_vcpu *vcpu, int cpu) { + return trace_vmx_vcpu_load(vcpu, cpu, &rb, e); +} SEC("kprobe/mark_page_dirty_in_slot") int BPF_KPROBE(kp_mark_page_dirty_in_slot, struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn) { diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 5ece0ec34..2259a45c4 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -257,6 +257,7 @@ int save_count_dirtypagemap_to_file(struct bpf_map *map) { static struct env { bool execute_vcpu_wakeup; bool execute_exit; + bool execute_vcpu_load; bool execute_halt_poll_ns; bool execute_mark_page_dirty; bool execute_page_fault; @@ -270,6 +271,7 @@ static struct env { enum EventType event_type; } env = { .execute_vcpu_wakeup = false, + .execute_vcpu_load = false, .execute_exit = false, .execute_halt_poll_ns = false, .execute_mark_page_dirty = false, @@ -291,6 +293,7 @@ int option_selected = 0; // 功能标志变量,确保激活子功能 // 具体解释命令行参数 static const struct argp_option opts[] = { {"vcpu_wakeup", 'w', NULL, 0, "Monitoring the wakeup of vcpu."}, + {"vcpu_load", 'o', NULL, 0, "Monitoring the load of vcpu."}, {"vm_exit", 'e', NULL, 0, "Monitoring the event of vm exit."}, {"halt_poll_ns", 'n', NULL, 0, "Monitoring the variation in vCPU halt-polling time."}, @@ -325,6 +328,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) { case 'e': SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_exit); break; + case 'o': + SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_vcpu_load); + break; case 'n': SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_halt_poll_ns); @@ -424,6 +430,8 @@ static int determineEventType(struct env *env) { env->event_type = HYPERCALL; } else if (env->execute_ioctl) { env->event_type = IOCTL; + } else if (env->execute_vcpu_load) { + env->event_type = VCPU_LOAD; } else { env->event_type = NONE_TYPE; // 或者根据需要设置一个默认的事件类型 } @@ -449,6 +457,12 @@ static int handle_event(void *ctx, void *data, size_t data_sz) { case EXIT: { break; } + case VCPU_LOAD: { + printf("%-20.6f %-15s %-6d/%-8d %-10d\n", timestamp_ms, + e->process.comm, e->process.pid, e->process.tid, + e->vcpu_load_data.vcpu_id); + break; + } case HALT_POLL: { // 使用 e->halt_poll_data 访问 HALT_POLL 特有成员 printf("%-18.6f %-15s %-6d/%-8d %-10s %-7d %-7d --> %d \n", @@ -695,6 +709,8 @@ static int print_event_head(struct env *env) { static void set_disable_load(struct kvm_watcher_bpf *skel) { bpf_program__set_autoload(skel->progs.tp_vcpu_wakeup, env.execute_vcpu_wakeup ? true : false); + bpf_program__set_autoload(skel->progs.kp_vmx_vcpu_load, + env.execute_vcpu_load ? true : false); bpf_program__set_autoload(skel->progs.fentry_kvm_vcpu_halt, env.execute_vcpu_wakeup ? true : false); bpf_program__set_autoload(skel->progs.tp_exit, @@ -825,7 +841,6 @@ int sort_by_key(struct kvm_watcher_bpf *skel, struct exit_key *keys, continue; } err = bpf_map_lookup_elem(fd, &next_key, &exit_value); - if (err < 0) { fprintf(stderr, "failed to lookup exit_value: %d\n", err); return -1; @@ -906,7 +921,6 @@ int print_exit_map(struct kvm_watcher_bpf *skel) { } return 0; } - void print_map_and_check_error(int (*print_func)(struct kvm_watcher_bpf *), struct kvm_watcher_bpf *skel, const char *map_name, int err) { @@ -916,7 +930,6 @@ void print_map_and_check_error(int (*print_func)(struct kvm_watcher_bpf *), printf("Error printing %s map: %d\n", map_name, err); } } - int main(int argc, char **argv) { // 定义一个环形缓冲区 struct ring_buffer *rb = NULL; From 26537d1c149a29f63900b06b0345fe2c4cd4ee0a Mon Sep 17 00:00:00 2001 From: YYS Date: Tue, 2 Apr 2024 14:47:32 +0800 Subject: [PATCH 15/15] vcpu_load --- .../kvm_watcher/include/kvm_vcpu.h | 94 +++++++++++++++---- .../kvm_watcher/include/kvm_watcher.h | 23 ++++- .../kvm_watcher/src/kvm_watcher.bpf.c | 9 +- .../kvm_watcher/src/kvm_watcher.c | 58 +++++++++++- 4 files changed, 154 insertions(+), 30 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h index f38a939b2..a06fc4a88 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h @@ -54,6 +54,20 @@ struct { __type(value, u32); } vcpu_tid SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, struct load_key); + __type(value, struct load_value); +} load_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, struct load_key); + __type(value, struct time_value); +} load_time SEC(".maps"); + // 记录vcpu_halt的id信息 static int trace_kvm_vcpu_halt(struct kvm_vcpu *vcpu) { u32 tid = bpf_get_current_pid_tgid(); @@ -102,26 +116,66 @@ static int trace_kvm_halt_poll_ns(struct halt_poll_ns *ctx, void *rb, bpf_ringbuf_submit(e, 0); return 0; } -//记录VCPU调度的信息 -static int trace_vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu, void *rb, - struct common_event *e) { - RESERVE_RINGBUF_ENTRY(rb, e); - //获取pid&tid - pid_t pid, tid; - u64 id; - id = bpf_get_current_pid_tgid(); - pid = id >> 32; - tid = (u32)id; - // //获取时间 - u64 ts = bpf_ktime_get_ns(); - e->process.pid = pid; - e->process.tid = tid; - e->time = ts; - bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); - bpf_probe_read_kernel(&e->vcpu_load_data.vcpu_id, - sizeof(e->vcpu_load_data.vcpu_id), &vcpu->vcpu_id); - bpf_ringbuf_submit(e, 0); - return 1; +//记录VCPU调度的信息--进调度 +static int trace_vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + u32 pid = bpf_get_current_pid_tgid() >> 32; + u32 tid = bpf_get_current_pid_tgid(); + u64 time = bpf_ktime_get_ns(); + u32 vcpu_id; + if (!vcpu) { + return 0; + } + bpf_probe_read_kernel(&vcpu_id, sizeof(u32), &vcpu->vcpu_id); + struct time_value time_value; + __builtin_memset(&time_value, 0, sizeof(struct time_value)); + time_value.time = time; + time_value.vcpu_id = vcpu_id; + time_value.pcpu_id = cpu; + struct load_key curr_load_key; + __builtin_memset(&curr_load_key, 0, sizeof(struct load_key)); + curr_load_key.pid = pid; + curr_load_key.tid = tid; + bpf_map_update_elem(&load_time, &curr_load_key, &time_value, BPF_ANY); + return 0; +} +//记录VCPU调度的信息--出调度 +static int trace_vmx_vcpu_put() { + u32 pid = bpf_get_current_pid_tgid() >> 32; + u32 tid = bpf_get_current_pid_tgid(); + struct load_key load_key; + __builtin_memset(&load_key, 0, sizeof(struct load_key)); + load_key.pid = pid; + load_key.tid = tid; + struct time_value *t_value; + t_value = bpf_map_lookup_elem(&load_time, &load_key); + if (!t_value) { + return 0; + } + u64 duration = bpf_ktime_get_ns() - t_value->time; + bpf_map_delete_elem(&load_time, &load_key); + struct load_value *load_value; + load_value = bpf_map_lookup_elem(&load_map, &load_key); + if (load_value) { + load_value->count++; + load_value->total_time += duration; + if (load_value->max_time < duration) { + load_value->max_time = duration; + } + if (load_value->min_time > duration) { + load_value->min_time = duration; + } + load_value->pcpu_id = t_value->pcpu_id; + load_value->vcpu_id = t_value->vcpu_id; + } else { + struct load_value new_load_value = {.count = 1, + .max_time = duration, + .total_time = duration, + .min_time = duration, + .vcpu_id = t_value->vcpu_id, + .pcpu_id = t_value->pcpu_id}; + bpf_map_update_elem(&load_map, &load_key, &new_load_value, BPF_ANY); + } + return 0; } static int trace_mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h index 4cc3db9c4..b9be73d32 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h @@ -93,6 +93,24 @@ struct exit_key { __u32 tid; }; +struct load_key { + __u32 pid; + __u32 tid; +}; +struct load_value { + __u64 max_time; + __u64 total_time; + __u64 min_time; + __u32 count; + __u32 vcpu_id; + __u32 pcpu_id; +}; +struct time_value { + __u32 pad; + __u64 time; + __u32 vcpu_id; + __u32 pcpu_id; +}; struct exit_value { __u64 max_time; __u64 total_time; @@ -159,11 +177,6 @@ struct common_event { // VCPU_WAKEUP 特有成员 } vcpu_wakeup_data; - struct { - __u32 vcpu_id; - // VCPU_LOAD 特有成员 - } vcpu_load_data; - struct { __u32 reason_number; __u64 duration_ns; diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c index a7867c8eb..09b72f223 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c @@ -67,10 +67,15 @@ SEC("tp/kvm/kvm_entry") int tp_entry(struct exit *ctx) { return trace_kvm_entry(); } -//记录VCPU调度的信息 +//记录VCPU调度的信息--进入 SEC("kprobe/vmx_vcpu_load") int BPF_KPROBE(kp_vmx_vcpu_load, struct kvm_vcpu *vcpu, int cpu) { - return trace_vmx_vcpu_load(vcpu, cpu, &rb, e); + return trace_vmx_vcpu_load(vcpu, cpu); +} +//记录VCPU调度的信息--退出 +SEC("kprobe/vmx_vcpu_put") +int BPF_KPROBE(kp_vmx_vcpu_put, struct kvm_vcpu *vcpu) { + return trace_vmx_vcpu_put(); } SEC("kprobe/mark_page_dirty_in_slot") int BPF_KPROBE(kp_mark_page_dirty_in_slot, struct kvm *kvm, diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 1a751baf8..5bf0afb14 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -466,9 +466,6 @@ static int handle_event(void *ctx, void *data, size_t data_sz) { break; } case VCPU_LOAD: { - printf("%-20.6f %-15s %-6d/%-8d %-10d\n", timestamp_ms, - e->process.comm, e->process.pid, e->process.tid, - e->vcpu_load_data.vcpu_id); break; } case HALT_POLL: { @@ -667,6 +664,9 @@ static int print_event_head(struct env *env) { case EXIT: printf("Waiting vm_exit ... \n"); break; + case VCPU_LOAD: + printf("Waiting vm_vcpu_load ... \n"); + break; case HALT_POLL: printf("%-18s %-15s %-15s %-10s %-7s %-11s %-10s\n", "TIME(ms)", "COMM", "PID/TID", "TYPE", "VCPU_ID", "OLD(ns)", "NEW(ns)"); @@ -723,6 +723,8 @@ static void set_disable_load(struct kvm_watcher_bpf *skel) { env.execute_vcpu_wakeup ? true : false); bpf_program__set_autoload(skel->progs.kp_vmx_vcpu_load, env.execute_vcpu_load ? true : false); + bpf_program__set_autoload(skel->progs.kp_vmx_vcpu_put, + env.execute_vcpu_load ? true : false); bpf_program__set_autoload(skel->progs.fentry_kvm_vcpu_halt, env.execute_vcpu_wakeup ? true : false); bpf_program__set_autoload(skel->progs.tp_exit, @@ -875,6 +877,52 @@ int sort_by_key(struct kvm_watcher_bpf *skel, struct exit_key *keys, } return count; } +int print_vcpu_load_map(struct kvm_watcher_bpf *skel) { + int fd = bpf_map__fd(skel->maps.load_map); + int err; + struct load_key lookup_key = {}; + struct load_key next_key = {}; + struct load_value load_value = {}; + int first = 1; + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + if (first) { + first = 0; + printf("\nTIME:%s\n", getCurrentTimeFormatted()); + printf("%-12s %-12s %-12s %-12s %-12s %-12s %-12s %-12s\n", "pid", + "tid", "total_time", "max_time", "min_time", "counts", + "vcpuid", "pcpuid"); + printf( + "------------ ------------ ------------ ------------ " + "------------ " + "------------ " + "------------ " + "------------\n"); + } + err = bpf_map_lookup_elem(fd, &next_key, &load_value); + if (err < 0) { + fprintf(stderr, "failed to lookup vcpu_load_value: %d\n", err); + return -1; + } + printf("%-12d %-12d %-12.4f %-12.4f %-12.4f %-12u %-12d %-12d\n", + next_key.pid, next_key.tid, + NS_TO_MS_WITH_DECIMAL(load_value.total_time), + NS_TO_MS_WITH_DECIMAL(load_value.max_time), + NS_TO_MS_WITH_DECIMAL(load_value.min_time), load_value.count, + load_value.vcpu_id, load_value.pcpu_id); + lookup_key = next_key; + } + // clear the maps + memset(&lookup_key, 0, sizeof(struct load_key)); + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + err = bpf_map_delete_elem(fd, &next_key); + if (err < 0) { + fprintf(stderr, "failed to cleanup counters: %d\n", err); + return -1; + } + lookup_key = next_key; + } + return 0; +} int print_exit_map(struct kvm_watcher_bpf *skel) { int fd = bpf_map__fd(skel->maps.exit_map); int err; @@ -1016,6 +1064,10 @@ int main(int argc, char **argv) { if (env.execute_exit) { print_map_and_check_error(print_exit_map, skel, "exit", err); } + if (env.execute_vcpu_load) { + print_map_and_check_error(print_vcpu_load_map, skel, "vcpu_load", + err); + } /* Ctrl-C will cause -EINTR */ if (err == -EINTR) { err = 0;