From 6cfd75f1f1729761b06d8edd4bbf38907aa4a591 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 21 Oct 2024 15:50:38 -0700 Subject: [PATCH] runc exec: implement CPU affinity As per - https://github.com/opencontainers/runtime-spec/pull/1253 - https://github.com/opencontainers/runtime-spec/pull/1261 Add some tests (alas it's impossible to test initial CPU affinity without adding debug logging). Signed-off-by: Kir Kolyshkin --- libcontainer/configs/config.go | 8 +++- libcontainer/configs/config_linux.go | 41 ++++++++++++++++++ libcontainer/container_linux.go | 4 ++ libcontainer/init_linux.go | 18 ++++++++ libcontainer/process.go | 2 + libcontainer/process_linux.go | 59 +++++++++++++++++++++++-- libcontainer/specconv/spec_linux.go | 5 +++ tests/integration/cpu_affinity.bats | 64 ++++++++++++++++++++++++++++ utils_linux.go | 1 + 9 files changed, 198 insertions(+), 4 deletions(-) create mode 100644 tests/integration/cpu_affinity.bats diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index dbf34f120cc..399cfc7af54 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -225,6 +225,9 @@ type Config struct { // IOPriority is the container's I/O priority. IOPriority *IOPriority `json:"io_priority,omitempty"` + + // ExecCPUAffinity is CPU affinity for a non-init process to be run in the container. + ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"` } // Scheduler is based on the Linux sched_setattr(2) syscall. @@ -286,7 +289,10 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) { }, nil } -type IOPriority = specs.LinuxIOPriority +type ( + IOPriority = specs.LinuxIOPriority + CPUAffinity = specs.CPUAffinity +) type ( HookName string diff --git a/libcontainer/configs/config_linux.go b/libcontainer/configs/config_linux.go index e401f5331b4..e41af765c55 100644 --- a/libcontainer/configs/config_linux.go +++ b/libcontainer/configs/config_linux.go @@ -4,6 +4,10 @@ import ( "errors" "fmt" "math" + "strconv" + "strings" + + "golang.org/x/sys/unix" ) var ( @@ -95,3 +99,40 @@ func (c Config) hostIDFromMapping(containerID int64, uMap []IDMap) (int64, bool) } return -1, false } + +// ToCPUSet converts a [CPUAffinity] field (initial or final) to [unix.CPUSet]. +func ToCPUSet(str string) (*unix.CPUSet, error) { + s := new(unix.CPUSet) + for _, r := range strings.Split(str, ",") { + // Allow extra spaces around. + r = strings.TrimSpace(r) + // Allow empty elements (extra commas). + if r == "" { + continue + } + if r0, r1, found := strings.Cut(r, "-"); found { + start, err := strconv.ParseUint(r0, 10, 32) + if err != nil { + return nil, err + } + end, err := strconv.ParseUint(r1, 10, 32) + if err != nil { + return nil, err + } + if start > end { + return nil, errors.New("invalid range: " + r) + } + for i := int(start); i <= int(end); i++ { + s.Set(i) + } + } else { + val, err := strconv.ParseUint(r, 10, 32) + if err != nil { + return nil, err + } + s.Set(int(val)) + } + } + + return s, nil +} diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index c99c8a6eea5..419832db4e9 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -697,6 +697,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig { AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, + CPUAffinity: c.config.ExecCPUAffinity, CreateConsole: process.ConsoleSocket != nil, ConsoleWidth: process.ConsoleWidth, ConsoleHeight: process.ConsoleHeight, @@ -713,6 +714,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } + if process.CPUAffinity != nil { + cfg.CPUAffinity = process.CPUAffinity + } if cgroups.IsCgroup2UnifiedMode() { cfg.Cgroup2Path = c.cgroupManager.Path("") } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index b218a6cb126..0de34aa3a3a 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -72,6 +72,7 @@ type initConfig struct { RootlessCgroups bool `json:"rootless_cgroups,omitempty"` SpecState *specs.State `json:"spec_state,omitempty"` Cgroup2Path string `json:"cgroup2_path,omitempty"` + CPUAffinity *specs.CPUAffinity `json:"cpu_affinity,omitempty"` } // Init is part of "runc init" implementation. @@ -199,6 +200,23 @@ func startInitialization() (retErr error) { } }() + // See tests/integration/cpu_affinity.bats. + if logrus.GetLevel() >= logrus.DebugLevel { + var cpus unix.CPUSet + err := unix.SchedGetaffinity(0, &cpus) + if err != nil { + logrus.Debugf("sched_getaffinity: error %v", err) + } else { + var list []int + for i := 0; i < 256; i++ { + if cpus.IsSet(i) { + list = append(list, i) + } + } + logrus.Debugf("Initial CPUs: %v", list) + } + } + var config initConfig if err := json.NewDecoder(initPipe).Decode(&config); err != nil { return err diff --git a/libcontainer/process.go b/libcontainer/process.go index 114b3f2b6cb..5339583ff57 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -102,6 +102,8 @@ type Process struct { Scheduler *configs.Scheduler IOPriority *configs.IOPriority + + CPUAffinity *configs.CPUAffinity } // Wait waits for the process to exit. diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 68a5fd7bcd4..9971f8d4f7b 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -163,13 +163,63 @@ type setnsProcess struct { initProcessPid int } +// Starts setns process with specified initial CPU affinity. +func (p *setnsProcess) startWithCPUAffinity() error { + aff := p.config.CPUAffinity + if aff == nil || aff.Initial == "" { + return p.cmd.Start() + } + logrus.Debugf("Initial CPU affinity: %s", aff.Initial) + cpus, err := configs.ToCPUSet(aff.Initial) + if err != nil { + return fmt.Errorf("invalid CPUAffinity.initial: %w", err) + } + + errCh := make(chan error) + defer close(errCh) + + // Use a goroutine to dedicate an OS thread. + go func() { + runtime.LockOSThread() + // Command inherits the CPU affinity. + if err := unix.SchedSetaffinity(unix.Gettid(), cpus); err != nil { + runtime.UnlockOSThread() + errCh <- fmt.Errorf("setting initial CPU affinity: %w", err) + return + } + + errCh <- p.cmd.Start() + // Deliberately omit runtime.UnlockOSThread here. + // https://pkg.go.dev/runtime#LockOSThread says: + // "If the calling goroutine exits without unlocking the + // thread, the thread will be terminated". + }() + + return <-errCh +} + +func (p *setnsProcess) setFinalCPUAffinity() error { + aff := p.config.CPUAffinity + if aff == nil || aff.Final == "" { + return nil + } + cpus, err := configs.ToCPUSet(aff.Final) + if err != nil { + return fmt.Errorf("invalid CPUAffinity.final: %w", err) + } + if err := unix.SchedSetaffinity(p.pid(), cpus); err != nil { + return fmt.Errorf("setting final CPU affinity: %w", err) + } + return nil +} + func (p *setnsProcess) start() (retErr error) { defer p.comm.closeParent() - // get the "before" value of oom kill count + // Get the "before" value of oom kill count. oom, _ := p.manager.OOMKillCount() - err := p.cmd.Start() - // close the child-side of the pipes (controlled by child) + err := p.startWithCPUAffinity() + // Close the child-side of the pipes (controlled by child). p.comm.closeChild() if err != nil { return fmt.Errorf("error starting setns process: %w", err) @@ -228,6 +278,9 @@ func (p *setnsProcess) start() (retErr error) { } } } + if err := p.setFinalCPUAffinity(); err != nil { + return err + } if err := utils.WriteJSON(p.comm.initSockParent, p.config); err != nil { return fmt.Errorf("error writing config to pipe: %w", err) diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 79a9a790049..a1d10714dfe 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -556,6 +556,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { ioPriority := *spec.Process.IOPriority config.IOPriority = &ioPriority } + if spec.Process.ExecCPUAffinity != nil { + a := *spec.Process.ExecCPUAffinity + config.ExecCPUAffinity = &a + } + } createHooks(spec, config) config.Version = specs.Version diff --git a/tests/integration/cpu_affinity.bats b/tests/integration/cpu_affinity.bats new file mode 100644 index 00000000000..0aaef66663c --- /dev/null +++ b/tests/integration/cpu_affinity.bats @@ -0,0 +1,64 @@ +#!/usr/bin/env bats +# Exec CPU affinity tests. For more details, see: +# - https://github.com/opencontainers/runtime-spec/pull/1253 + +load helpers + +function setup() { + requires smp cgroups_cpuset + setup_busybox +} + +function teardown() { + teardown_bundle +} + +function all_cpus() { + cat /sys/devices/system/cpu/online +} + +function first_cpu() { + all_cpus | sed 's/[-,].*//g' +} + +@test "runc exec [CPU affinity, initial set via process.json]" { + first="$(first_cpu)" + second=$((first + 1)) # Hacky; might not work in all environments. + + runc run -d --console-socket "$CONSOLE_SOCKET" ct1 + [ "$status" -eq 0 ] + + for cpus in "$first" "$first-$second" "$first,$second" "$second"; do + proc=' +{ + "terminal": false, + "execCPUAffinity": { + "initial": "'$cpus'" + }, + "args": [ "/bin/true" ], + "cwd": "/" +}' + exp=${cpus//,/-} # 1. "," --> "-". + exp=${exp//-/ } # 2. "-" --> " ". + echo "CPUS: $cpus, exp: $exp" + runc --debug exec --process <(echo "$proc") ct1 + [[ "$output" == *"Initial CPU affinity: $cpus"* ]] + [[ "$output" == *"Initial CPUs: [$exp]"* ]] + done +} + +@test "runc exec [CPU affinity, initial and final are set]" { + first="$(first_cpu)" + second=$((first + 1)) # Hacky; might not work in all environments. + + update_config " .process.execCPUAffinity.initial = \"$first\" + | .process.execCPUAffinity.final = \"$second\"" + + runc run -d --console-socket "$CONSOLE_SOCKET" ct1 + [ "$status" -eq 0 ] + + runc --debug exec ct1 grep "Cpus_allowed_list:" /proc/self/status + [ "$status" -eq 0 ] + [[ "$output" == *"Initial CPUs: [$first]"* ]] + [[ "$output" == *"Cpus_allowed_list: $second"* ]] # Mind the literal tab. +} diff --git a/utils_linux.go b/utils_linux.go index eef78ea3845..edbabdfa99c 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -57,6 +57,7 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { AppArmorProfile: p.ApparmorProfile, Scheduler: p.Scheduler, IOPriority: p.IOPriority, + CPUAffinity: p.ExecCPUAffinity, } if p.ConsoleSize != nil {