Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

runc exec: implement CPU affinity #4327

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* `configs.CommandHook` struct has changed, Command is now a pointer.
Also, `configs.NewCommandHook` now accepts a `*Command`. (#4325)

### Added
* CPU affinity support for `runc exec`. (#4327)

## [1.2.0] - 2024-10-22

> できるときにできることをやるんだ。それが今だ。
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ require (
github.com/moby/sys/user v0.3.0
github.com/moby/sys/userns v0.1.0
github.com/mrunalp/fileutils v0.5.1
github.com/opencontainers/runtime-spec v1.2.0
github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95
github.com/opencontainers/selinux v1.11.1
github.com/seccomp/libseccomp-golang v0.10.0
github.com/sirupsen/logrus v1.9.3
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q=
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 h1:Ghl8Z3l+yPQUDSxAp7Kg7fJLRNNXjOsR6ooDcca7PjU=
github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.11.1 h1:nHFvthhM0qY8/m+vfhJylliSshm8G1jJ2jDMcgULaH8=
github.com/opencontainers/selinux v1.11.1/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand Down
72 changes: 72 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ package configs
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"os/exec"
"strconv"
"strings"
"time"

"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -225,6 +228,9 @@ type Config struct {

// IOPriority is the container's I/O priority.
IOPriority *IOPriority `json:"io_priority,omitempty"`

// ExecCPUAffinity is CPU affinity for a non-init process to be run in the container.
ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"`
}

// Scheduler is based on the Linux sched_setattr(2) syscall.
Expand Down Expand Up @@ -288,6 +294,72 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {

type IOPriority = specs.LinuxIOPriority

type CPUAffinity struct {
Initial, Final *unix.CPUSet
}

func toCPUSet(str string) (*unix.CPUSet, error) {
if str == "" {
return nil, nil
}
s := new(unix.CPUSet)
for _, r := range strings.Split(str, ",") {
// Allow extra spaces around.
r = strings.TrimSpace(r)
// Allow empty elements (extra commas).
if r == "" {
continue
}
if r0, r1, found := strings.Cut(r, "-"); found {
start, err := strconv.ParseUint(r0, 10, 32)
if err != nil {
return nil, err
}
end, err := strconv.ParseUint(r1, 10, 32)
if err != nil {
return nil, err
}
if start > end {
return nil, errors.New("invalid range: " + r)
}
for i := int(start); i <= int(end); i++ {
s.Set(i)
}
} else {
val, err := strconv.ParseUint(r, 10, 32)
if err != nil {
return nil, err
}
s.Set(int(val))
}
}

return s, nil
}

// ConvertCPUAffinity converts [specs.CPUAffinity] to [CPUAffinity].
func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
if sa == nil {
return nil, nil
}
initial, err := toCPUSet(sa.Initial)
if err != nil {
return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
}
final, err := toCPUSet(sa.Final)
if err != nil {
return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
}
if initial == nil && final == nil {
return nil, nil
}

return &CPUAffinity{
Initial: initial,
Final: final,
}, nil
}

type (
HookName string
HookList []Hook
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel,
Rlimits: c.config.Rlimits,
CPUAffinity: c.config.ExecCPUAffinity,
CreateConsole: process.ConsoleSocket != nil,
ConsoleWidth: process.ConsoleWidth,
ConsoleHeight: process.ConsoleHeight,
Expand All @@ -713,6 +714,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
if len(process.Rlimits) > 0 {
cfg.Rlimits = process.Rlimits
}
if process.CPUAffinity != nil {
cfg.CPUAffinity = process.CPUAffinity
}
if cgroups.IsCgroup2UnifiedMode() {
cfg.Cgroup2Path = c.cgroupManager.Path("")
}
Expand Down
3 changes: 2 additions & 1 deletion libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type initConfig struct {
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
SpecState *specs.State `json:"spec_state,omitempty"`
Cgroup2Path string `json:"cgroup2_path,omitempty"`
CPUAffinity *configs.CPUAffinity `json:"cpu_affinity,omitempty"`
}

// Init is part of "runc init" implementation.
Expand Down Expand Up @@ -150,7 +151,7 @@ func startInitialization() (retErr error) {

logrus.SetOutput(logPipe)
logrus.SetFormatter(new(logrus.JSONFormatter))
logrus.Debug("child process in init()")
logrus.Debugf("child process in init()")

// Only init processes have FIFOFD.
var fifoFile *os.File
Expand Down
9 changes: 7 additions & 2 deletions libcontainer/nsenter/log.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ void setup_logpipe(void)
loglevel = i;
}

bool log_enabled_for(int level)
{
return (logfd >= 0 && level <= loglevel);
}

/* Defined in nsexec.c */
extern int current_stage;

Expand All @@ -40,8 +45,8 @@ void write_log(int level, const char *format, ...)
va_list args;
int ret;

if (logfd < 0 || level > loglevel)
goto out;
if (!log_enabled_for(level))
return;

va_start(args, format);
ret = vasprintf(&message, format, args);
Expand Down
3 changes: 3 additions & 0 deletions libcontainer/nsenter/log.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef NSENTER_LOG_H
#define NSENTER_LOG_H

#include <stdbool.h>
#include <stdio.h>

/*
Expand All @@ -20,6 +21,8 @@
*/
void setup_logpipe(void);

bool log_enabled_for(int level);

void write_log(int level, const char *format, ...) __attribute__((format(printf, 2, 3)));

extern int logfd;
Expand Down
29 changes: 29 additions & 0 deletions libcontainer/nsenter/nsexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,25 @@ static void update_timens_offsets(pid_t pid, char *map, size_t map_len)
bail("failed to update /proc/%d/timens_offsets", pid);
}

void print_cpu_affinity()
{
cpu_set_t cpus = { };
size_t i, mask = 0;

if (sched_getaffinity(0, sizeof(cpus), &cpus) < 0) {
write_log(WARNING, "sched_getaffinity: %m");
return;
}

/* Do not print the complete mask, we only need a few first CPUs. */
for (i = 0; i < sizeof(mask) * 8; i++) {
if (CPU_ISSET(i, &cpus))
mask |= 1 << i;
}

write_log(DEBUG, "affinity: 0x%zx", mask);
}

void nsexec(void)
{
int pipenum;
Expand All @@ -699,6 +718,16 @@ void nsexec(void)

write_log(DEBUG, "=> nsexec container setup");

/* This is for ../../tests/integration/cpu_affinity.bats test only.
*
* Printing this from Go code might be too late as some kernels
* change the process' CPU affinity to that of container's cpuset
* as soon as the process is moved into container's cgroup.
*/
if (log_enabled_for(DEBUG)) {
print_cpu_affinity();
}

/* Parse all of the netlink configuration. */
nl_parse(pipenum, &config);

Expand Down
2 changes: 2 additions & 0 deletions libcontainer/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ type Process struct {
Scheduler *configs.Scheduler

IOPriority *configs.IOPriority

CPUAffinity *configs.CPUAffinity
}

// Wait waits for the process to exit.
Expand Down
51 changes: 47 additions & 4 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,53 @@ type setnsProcess struct {
initProcessPid int
}

// Starts setns process with specified initial CPU affinity.
func (p *setnsProcess) startWithCPUAffinity() error {
aff := p.config.CPUAffinity
if aff == nil || aff.Initial == nil {
return p.cmd.Start()
}
errCh := make(chan error)
defer close(errCh)

// Use a goroutine to dedicate an OS thread.
go func() {
runtime.LockOSThread()
// Command inherits the CPU affinity.
if err := unix.SchedSetaffinity(unix.Gettid(), aff.Initial); err != nil {
runtime.UnlockOSThread()
errCh <- fmt.Errorf("error setting initial CPU affinity: %w", err)
return
}

errCh <- p.cmd.Start()
// Deliberately omit runtime.UnlockOSThread here.
// https://pkg.go.dev/runtime#LockOSThread says:
// "If the calling goroutine exits without unlocking the
// thread, the thread will be terminated".
}()

return <-errCh
}

func (p *setnsProcess) setFinalCPUAffinity() error {
aff := p.config.CPUAffinity
if aff == nil || aff.Final == nil {
return nil
}
if err := unix.SchedSetaffinity(p.pid(), aff.Final); err != nil {
return fmt.Errorf("error setting final CPU affinity: %w", err)
}
return nil
}

func (p *setnsProcess) start() (retErr error) {
defer p.comm.closeParent()

// get the "before" value of oom kill count
// Get the "before" value of oom kill count.
oom, _ := p.manager.OOMKillCount()
err := p.cmd.Start()
// close the child-side of the pipes (controlled by child)
err := p.startWithCPUAffinity()
// Close the child-side of the pipes (controlled by child).
p.comm.closeChild()
if err != nil {
return fmt.Errorf("error starting setns process: %w", err)
Expand Down Expand Up @@ -219,6 +259,10 @@ func (p *setnsProcess) start() (retErr error) {
}
}
}
// Set final CPU affinity right after the process is moved into container's cgroup.
if err := p.setFinalCPUAffinity(); err != nil {
return err
}
if p.intelRdtPath != "" {
// if Intel RDT "resource control" filesystem path exists
_, err := os.Stat(p.intelRdtPath)
Expand All @@ -228,7 +272,6 @@ func (p *setnsProcess) start() (retErr error) {
}
}
}

if err := utils.WriteJSON(p.comm.initSockParent, p.config); err != nil {
return fmt.Errorf("error writing config to pipe: %w", err)
}
Expand Down
5 changes: 5 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
ioPriority := *spec.Process.IOPriority
config.IOPriority = &ioPriority
}
config.ExecCPUAffinity, err = configs.ConvertCPUAffinity(spec.Process.ExecCPUAffinity)
if err != nil {
return nil, err
}

}
createHooks(spec, config)
config.Version = specs.Version
Expand Down
Loading
Loading