From d94301df207215b03584d988e29d1459a082a34e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 10 Jan 2025 19:26:05 -0800 Subject: [PATCH] wipt Signed-off-by: Kir Kolyshkin --- libcontainer/configs/config.go | 9 ++--- libcontainer/configs/config_linux.go | 33 ++++++++++++++++-- libcontainer/init_linux.go | 21 ++---------- libcontainer/nsenter/log.c | 9 +++-- libcontainer/nsenter/log.h | 3 ++ libcontainer/nsenter/nsexec.c | 39 ++++++++++----------- libcontainer/process_linux.go | 22 ++++-------- libcontainer/specconv/spec_linux.go | 6 ++-- tests/integration/cpu_affinity.bats | 51 +++++++++++++++------------- utils_linux.go | 7 +++- 10 files changed, 111 insertions(+), 89 deletions(-) diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 3169398c91e..f6554982c27 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -289,10 +289,11 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) { }, nil } -type ( - IOPriority = specs.LinuxIOPriority - CPUAffinity = specs.CPUAffinity -) +type IOPriority = specs.LinuxIOPriority + +type CPUAffinity struct { + Initial, Final *unix.CPUSet +} type ( HookName string diff --git a/libcontainer/configs/config_linux.go b/libcontainer/configs/config_linux.go index e41af765c55..17a3eb863f5 100644 --- a/libcontainer/configs/config_linux.go +++ b/libcontainer/configs/config_linux.go @@ -8,6 +8,8 @@ import ( "strings" "golang.org/x/sys/unix" + + "github.com/opencontainers/runtime-spec/specs-go" ) var ( @@ -100,8 +102,12 @@ func (c Config) hostIDFromMapping(containerID int64, uMap []IDMap) (int64, bool) return -1, false } -// ToCPUSet converts a [CPUAffinity] field (initial or final) to [unix.CPUSet]. -func ToCPUSet(str string) (*unix.CPUSet, error) { +// toCPUSet converts a [specs.CPUAffinity] field (initial or final) to [unix.CPUSet] +// used by [CPUAffinity]. +func toCPUSet(str string) (*unix.CPUSet, error) { + if str == "" { + return nil, nil + } s := new(unix.CPUSet) for _, r := range strings.Split(str, ",") { // Allow extra spaces around. @@ -136,3 +142,26 @@ func ToCPUSet(str string) (*unix.CPUSet, error) { return s, nil } + +// ConvertCPUAffinity converts [specs.CPUAffinity] to [CPUAffinity]. +func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) { + if sa == nil { + return nil, nil + } + initial, err := toCPUSet(sa.Initial) + if err != nil { + return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err) + } + final, err := toCPUSet(sa.Final) + if err != nil { + return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err) + } + if initial == nil && final == nil { + return nil, nil + } + + return &CPUAffinity{ + Initial: initial, + Final: final, + }, nil +} diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index e24d81817c1..d096054278d 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -71,7 +71,7 @@ type initConfig struct { RootlessCgroups bool `json:"rootless_cgroups,omitempty"` SpecState *specs.State `json:"spec_state,omitempty"` Cgroup2Path string `json:"cgroup2_path,omitempty"` - CPUAffinity *specs.CPUAffinity `json:"cpu_affinity,omitempty"` + CPUAffinity *configs.CPUAffinity `json:"cpu_affinity,omitempty"` } // Init is part of "runc init" implementation. @@ -151,24 +151,7 @@ func startInitialization() (retErr error) { logrus.SetOutput(logPipe) logrus.SetFormatter(new(logrus.JSONFormatter)) - logrus.Debugf("child process in init(), pid=%d", unix.Getpid()) - - // See tests/integration/cpu_affinity.bats. - if logrus.GetLevel() >= logrus.DebugLevel { - var cpus unix.CPUSet - err := unix.SchedGetaffinity(0, &cpus) - if err != nil { - logrus.Debugf("sched_getaffinity: error %v", err) - } else { - var list []int - for i := 0; i < 32; i++ { - if cpus.IsSet(i) { - list = append(list, i) - } - } - logrus.Debugf("Initial CPUs: %v", list) - } - } + logrus.Debugf("child process in init()") // Only init processes have FIFOFD. var fifoFile *os.File diff --git a/libcontainer/nsenter/log.c b/libcontainer/nsenter/log.c index 086b539833c..72774cb097e 100644 --- a/libcontainer/nsenter/log.c +++ b/libcontainer/nsenter/log.c @@ -31,6 +31,11 @@ void setup_logpipe(void) loglevel = i; } +bool log_enabled_for(int level) +{ + return (logfd >= 0 && level <= loglevel); +} + /* Defined in nsexec.c */ extern int current_stage; @@ -40,8 +45,8 @@ void write_log(int level, const char *format, ...) va_list args; int ret; - if (logfd < 0 || level > loglevel) - goto out; + if (!log_enabled_for(level)) + return; va_start(args, format); ret = vasprintf(&message, format, args); diff --git a/libcontainer/nsenter/log.h b/libcontainer/nsenter/log.h index 1fe95a111f7..3e18de68764 100644 --- a/libcontainer/nsenter/log.h +++ b/libcontainer/nsenter/log.h @@ -1,6 +1,7 @@ #ifndef NSENTER_LOG_H #define NSENTER_LOG_H +#include #include /* @@ -20,6 +21,8 @@ */ void setup_logpipe(void); +bool log_enabled_for(int level); + void write_log(int level, const char *format, ...) __attribute__((format(printf, 2, 3))); extern int logfd; diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 376c3beffb8..f7fe445c86f 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -673,20 +673,23 @@ static void update_timens_offsets(pid_t pid, char *map, size_t map_len) bail("failed to update /proc/%d/timens_offsets", pid); } -void print_cpu_affinity() { - cpu_set_t cpus = {}; - - if (sched_getaffinity(0, sizeof(cpus), &cpus) >= 0) { - char buf[128], *bp; - bp = buf; - for (int i = 0; i < 32; i++) { - if CPU_ISSET(i, &cpus) - bp += sprintf(bp, "%d ", i); - } - write_log(DEBUG, "CPUs: %s", buf); - } else { +void print_cpu_affinity() +{ + cpu_set_t cpus = { }; + size_t i, mask = 0; + + if (sched_getaffinity(0, sizeof(cpus), &cpus) < 0) { write_log(WARNING, "sched_getaffinity: %m"); + return; + } + + /* Do not print the complete mask, we only need a few first CPUs. */ + for (i = 0; i < sizeof(mask) * 8; i++) { + if (CPU_ISSET(i, &cpus)) + mask |= 1 << i; } + + write_log(DEBUG, "affinity: 0x%zx", mask); } void nsexec(void) @@ -713,10 +716,13 @@ void nsexec(void) return; } - print_cpu_affinity(); - write_log(DEBUG, "=> nsexec container setup"); + /* For ../../tests/integration/cpu_affinity.bats. */ + if (log_enabled_for(DEBUG)) { + print_cpu_affinity(); + } + /* Parse all of the netlink configuration. */ nl_parse(pipenum, &config); @@ -821,8 +827,6 @@ void nsexec(void) current_stage = STAGE_PARENT; prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0); write_log(DEBUG, "~> nsexec stage-0"); - print_cpu_affinity(); - /* Start the process of getting a container. */ write_log(DEBUG, "spawn stage-1"); @@ -987,8 +991,6 @@ void nsexec(void) /* For debugging. */ prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0); write_log(DEBUG, "~> nsexec stage-1"); - print_cpu_affinity(); - /* * We need to setns first. We cannot do this earlier (in stage 0) @@ -1196,7 +1198,6 @@ void nsexec(void) /* Finish executing, let the Go runtime take over. */ write_log(DEBUG, "<= nsexec container setup"); - print_cpu_affinity(); write_log(DEBUG, "booting up go runtime ..."); return; } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 23a1eddef33..7dafb9b7df4 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -166,15 +166,9 @@ type setnsProcess struct { // Starts setns process with specified initial CPU affinity. func (p *setnsProcess) startWithCPUAffinity() error { aff := p.config.CPUAffinity - if aff == nil || aff.Initial == "" { + if aff == nil || aff.Initial == nil { return p.cmd.Start() } - logrus.Debugf("Initial CPU affinity: %s", aff.Initial) - cpus, err := configs.ToCPUSet(aff.Initial) - if err != nil { - return fmt.Errorf("invalid CPUAffinity.initial: %w", err) - } - errCh := make(chan error) defer close(errCh) @@ -182,9 +176,9 @@ func (p *setnsProcess) startWithCPUAffinity() error { go func() { runtime.LockOSThread() // Command inherits the CPU affinity. - if err := unix.SchedSetaffinity(unix.Gettid(), cpus); err != nil { + if err := unix.SchedSetaffinity(unix.Gettid(), aff.Initial); err != nil { runtime.UnlockOSThread() - errCh <- fmt.Errorf("setting initial CPU affinity: %w", err) + errCh <- fmt.Errorf("error setting initial CPU affinity: %w", err) return } @@ -200,15 +194,11 @@ func (p *setnsProcess) startWithCPUAffinity() error { func (p *setnsProcess) setFinalCPUAffinity() error { aff := p.config.CPUAffinity - if aff == nil || aff.Final == "" { + if aff == nil || aff.Final == nil { return nil } - cpus, err := configs.ToCPUSet(aff.Final) - if err != nil { - return fmt.Errorf("invalid CPUAffinity.final: %w", err) - } - if err := unix.SchedSetaffinity(p.pid(), cpus); err != nil { - return fmt.Errorf("setting final CPU affinity: %w", err) + if err := unix.SchedSetaffinity(p.pid(), aff.Final); err != nil { + return fmt.Errorf("error setting final CPU affinity: %w", err) } return nil } diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 4cbce343e1f..e59b3c7fb03 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -556,9 +556,9 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { ioPriority := *spec.Process.IOPriority config.IOPriority = &ioPriority } - if spec.Process.ExecCPUAffinity != nil { - a := *spec.Process.ExecCPUAffinity - config.ExecCPUAffinity = &a + config.ExecCPUAffinity, err = configs.ConvertCPUAffinity(spec.Process.ExecCPUAffinity) + if err != nil { + return nil, err } } diff --git a/tests/integration/cpu_affinity.bats b/tests/integration/cpu_affinity.bats index 8a823ce23cc..95443376f43 100644 --- a/tests/integration/cpu_affinity.bats +++ b/tests/integration/cpu_affinity.bats @@ -21,14 +21,29 @@ function first_cpu() { all_cpus | sed 's/[-,].*//g' } -@test "runc exec [CPU affinity, initial set via process.json]" { +# Convert cpus to mask, as printed by nsexec. +# NOTE the range conversion is not proper. +function cpus_to_mask() { + local cpus=$* mask=0 + + cpus=${cpus//,/-} # 1. "," --> "-". + cpus=${cpus//-/ } # 2. "-" --> " ". + + for c in $cpus; do + mask=$((mask | 1< "-". - exp=${exp//-/ } # 2. "-" --> " ". - echo "CPUS: $cpus, exp: $exp" + mask=$(cpus_to_mask "$cpus") + echo "CPUS: $cpus, mask: $mask" runc --debug exec --process <(echo "$proc") ct1 - [[ "$output" == *"nsexec["*": CPUs: $exp "* ]] - [[ "$output" == *"nsexec-0["*": CPUs: $exp "* ]] - [[ "$output" == *"nsexec-1["*": CPUs: $exp "* ]] - [[ "$output" == *"nsexec-2["*": CPUs: $exp "* ]] - [[ "$output" == *"Initial CPU affinity: $cpus"* ]] - [[ "$output" == *"Initial CPUs: [$exp]"* ]] + [[ "$output" == *"nsexec"*": affinity: $mask"* ]] done } -@test "runc exec [CPU affinity, initial and final are set]" { - first="$(first_cpu)" - second=$((first + 1)) # Hacky; might not work in all environments. +@test "runc exec [CPU affinity, initial and final set from config.json]" { + initial="$(first_cpu)" + final=$((initial + 1)) # Hacky; might not work in all environments. - update_config " .process.execCPUAffinity.initial = \"$first\" - | .process.execCPUAffinity.final = \"$second\"" + update_config " .process.execCPUAffinity.initial = \"$initial\" + | .process.execCPUAffinity.final = \"$final\"" runc run -d --console-socket "$CONSOLE_SOCKET" ct1 [ "$status" -eq 0 ] runc --debug exec ct1 grep "Cpus_allowed_list:" /proc/self/status [ "$status" -eq 0 ] - [[ "$output" == *"nsexec["*": CPUs: $first "* ]] - [[ "$output" == *"nsexec-0["*": CPUs: $first "* ]] - [[ "$output" == *"nsexec-1["*": CPUs: $first "* ]] - [[ "$output" == *"nsexec-2["*": CPUs: $first "* ]] - [[ "$output" == *"Initial CPU affinity: $first"* ]] - [[ "$output" == *"Initial CPUs: [$first]"* ]] - [[ "$output" == *"Cpus_allowed_list: $second"* ]] # Mind the literal tab. + mask=$(cpus_to_mask "$initial") + [[ "$output" == *"nsexec"*": affinity: $mask"* ]] + [[ "$output" == *"Cpus_allowed_list: $final"* ]] # Mind the literal tab. } diff --git a/utils_linux.go b/utils_linux.go index edbabdfa99c..20bf8511482 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -57,7 +57,6 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { AppArmorProfile: p.ApparmorProfile, Scheduler: p.Scheduler, IOPriority: p.IOPriority, - CPUAffinity: p.ExecCPUAffinity, } if p.ConsoleSize != nil { @@ -83,6 +82,12 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { } lp.Rlimits = append(lp.Rlimits, rl) } + aff, err := configs.ConvertCPUAffinity(p.ExecCPUAffinity) + if err != nil { + return nil, err + } + lp.CPUAffinity = aff + return lp, nil }