Skip to content

Commit

Permalink
Revisit device controller detection for cgroup v2
Browse files Browse the repository at this point in the history
The cgroup v2 device controller is not listed in the cgroup.controllers
file and is solely available via the kernel's BPF interface. Therefore,
k0s sysinfo determines its presence based on the Linux kernel version.
This is problematic for old kernels that have many backported features,
such as RHEL and consorts.

However, it is still possible to detect the device controller by trying
to attach a dummy device filter to an empty, temporary cgroup. In case
k0s is unable to create the cgroup or attach the device filter because
of missing permissions, the presence of the devices controller is simply
assumed.

See: 0655941 ("Add pre-flight checks and probes module")
Signed-off-by: Tom Wieczorek <[email protected]>
  • Loading branch information
twz123 committed Jul 13, 2023
1 parent dcf5d26 commit 96c150a
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 43 deletions.
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ require (
github.com/avast/retry-go v3.0.0+incompatible
github.com/bombsimon/logrusr/v2 v2.0.1
github.com/cavaliergopher/grab/v3 v3.0.1
github.com/cilium/ebpf v0.11.0
github.com/cloudflare/cfssl v1.6.4
github.com/containerd/cgroups/v3 v3.0.1
github.com/containerd/containerd v1.7.2
Expand All @@ -31,6 +32,7 @@ require (
github.com/mitchellh/go-homedir v1.1.0
github.com/olekukonko/tablewriter v0.0.5
github.com/opencontainers/image-spec v1.1.0-rc4
github.com/opencontainers/runtime-spec v1.1.0-rc.2
github.com/otiai10/copy v1.12.0
github.com/pelletier/go-toml v1.9.5
github.com/robfig/cron v1.2.0
Expand All @@ -52,7 +54,7 @@ require (
go.uber.org/multierr v1.11.0
go.uber.org/zap v1.24.0
golang.org/x/crypto v0.11.0
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91
golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb
golang.org/x/mod v0.12.0
golang.org/x/sync v0.3.0
golang.org/x/sys v0.10.0
Expand Down Expand Up @@ -102,7 +104,6 @@ require (
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/chai2010/gettext-go v1.0.2 // indirect
github.com/cilium/ebpf v0.9.1 // indirect
github.com/containerd/cgroups v1.1.0 // indirect
github.com/containerd/console v1.0.3 // indirect
github.com/containerd/continuity v0.4.1 // indirect
Expand Down Expand Up @@ -208,7 +209,6 @@ require (
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/runc v1.1.7 // indirect
github.com/opencontainers/runtime-spec v1.1.0-rc.2 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
Expand Down
10 changes: 5 additions & 5 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHe
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4=
github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY=
github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y=
github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudflare/cfssl v1.6.4 h1:NMOvfrEjFfC63K3SGXgAnFdsgkmiq4kATme5BfcqrO8=
github.com/cloudflare/cfssl v1.6.4/go.mod h1:8b3CQMxfWPAeom3zBnGJ6sd+G1NkL5TXqmDXacb+1J0=
Expand Down Expand Up @@ -260,8 +260,8 @@ github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBd
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/flowstack/go-jsonschema v0.1.1/go.mod h1:yL7fNggx1o8rm9RlgXv7hTBWxdBM0rVwpMwimd3F3N0=
github.com/foxcpp/go-mockdns v1.0.0 h1:7jBqxd3WDWwi/6WhDvacvH1XsN3rOLXyHM1uhvIx6FI=
github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE=
github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps=
github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
Expand Down Expand Up @@ -993,8 +993,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 h1:tnebWN09GYg9OLPss1KXj8txwZc6X6uMr6VFdcGNbHw=
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb h1:xIApU0ow1zwMa2uL1VDNeQlNVFTWMQxZUZCMDy0Q4Us=
golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
Expand Down
140 changes: 105 additions & 35 deletions internal/pkg/sysinfo/probes/linux/cgroup_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,17 @@ package linux
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"

"k8s.io/utils/pointer"

"github.com/cilium/ebpf/rlimit"
"github.com/containerd/cgroups/v3/cgroup2"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)

type cgroupV2 struct {
Expand All @@ -54,22 +59,110 @@ func (g *cgroupV2) loadControllers(seen func(string, string)) error {
return g.detectListedRootControllers(seen)
}

// The device controller has no interface files. Its availability is assumed
// based on the kernel version, as it is hard to detect it directly.
// Detects the device controller by trying to attach a dummy program of type
// BPF_CGROUP_DEVICE to a cgroup. Since the controller has no interface files
// and is implemented purely on top of BPF, this is the only reliable way to
// detect it. A best-guess detection via the kernel version has the major
// drawback of not working with kernels that have a lot of backported features,
// such as RHEL and friends.
//
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#device-controller
func (g *cgroupV2) detectDevicesController() (cgroupControllerAvailable, error) {
major, minor, err := parseKernelRelease(g.probeUname)
err := attachDummyDeviceFilter(g.mountPoint)
switch {
case err == nil:
return cgroupControllerAvailable{true, "device filters attachable", ""}, nil

// EACCES occurs when not allowed to create cgroups.
// EPERM occurs when not allowed to load eBPF programs.
case errors.Is(err, os.ErrPermission) && os.Geteuid() != 0:
return cgroupControllerAvailable{true, "assumed", "insufficient permissions, try with elevated permissions"}, nil

case eBPFProgramUnsupported(err):
return cgroupControllerAvailable{false, err.Error(), ""}, nil
}

return cgroupControllerAvailable{}, err
}

// Attaches a dummy program of type BPF_CGROUP_DEVICE to a randomly created
// cgroup and removes the program and cgroup again.
func attachDummyDeviceFilter(mountPoint string) (err error) {
insts, license, err := cgroup2.DeviceFilter([]specs.LinuxDeviceCgroup{{
Allow: true,
Type: "a",
Major: pointer.Int64(-1),
Minor: pointer.Int64(-1),
Access: "rwm",
}})
if err != nil {
return cgroupControllerAvailable{}, err
return fmt.Errorf("failed to create eBPF device filter program: %w", err)
}

tmpCgroupPath, err := os.MkdirTemp(mountPoint, "k0s-devices-detection-*")
if err != nil {
return fmt.Errorf("failed to create temporary cgroup: %w", err)
}
defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }()

dirFD, err := unix.Open(tmpCgroupPath, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("failed to open temporary cgroup: %w", &fs.PathError{Op: "open", Path: tmpCgroupPath, Err: err})
}
defer func() {
if closeErr := unix.Close(dirFD); closeErr != nil {
err = errors.Join(err, &fs.PathError{Op: "close", Path: tmpCgroupPath, Err: closeErr})
}
}()

close, err := cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD)
if err != nil {
// RemoveMemlock may be required on kernels < 5.11
// observed on debian 11: 5.10.0-21-armmp-lpae #1 SMP Debian 5.10.162-1 (2023-01-21) armv7l
// https://github.com/cilium/ebpf/blob/v0.11.0/prog.go#L356-L360
if errors.Is(err, unix.EPERM) && strings.Contains(err.Error(), "RemoveMemlock") {
if err2 := rlimit.RemoveMemlock(); err2 != nil {
err = errors.Join(err, err2)
} else {
// Try again, MEMLOCK should be removed by now.
close, err2 = cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD)
if err2 != nil {
err = errors.Join(err, err2)
} else {
err = nil
}
}
}
}
if err != nil {
if eBPFProgramUnsupported(err) {
return err
}
return fmt.Errorf("failed to load/attach eBPF device filter program: %w", err)
}

return close()
}

// since 4.15
available, op := false, "<"
if major > 4 || (major == 4 && minor >= 15) {
available, op = true, ">="
// Returns true if the given error indicates that an eBPF program is unsupported
// by the kernel.
func eBPFProgramUnsupported(err error) bool {
// https://github.com/cilium/ebpf/blob/v0.11.0/features/prog.go#L43-L49

switch {
// EINVAL occurs when attempting to create a program with an unknown type.
case errors.Is(err, unix.EINVAL):
return true

// E2BIG occurs when ProgLoadAttr contains non-zero bytes past the end of
// the struct known by the running kernel, meaning the kernel is too old to
// support the given prog type.
case errors.Is(err, unix.E2BIG):
return true

default:
return false
}
msg := fmt.Sprintf("kernel %d.%d %s 4.15", major, minor, op)
return cgroupControllerAvailable{available, msg, ""}, nil
}

// Detect the freezer controller. It doesn't appear in the cgroup.controllers
Expand Down Expand Up @@ -137,26 +230,3 @@ func (g *cgroupV2) detectListedRootControllers(seen func(string, string)) (err e

return nil
}

func parseKernelRelease(probeUname unameProber) (int64, int64, error) {
uname, err := probeUname()
if err != nil {
return 0, 0, err
}

var major, minor int64
r := regexp.MustCompile(`^(\d+)\.(\d+)(\.|$)`)
if matches := r.FindStringSubmatch(uname.osRelease.value); matches == nil {
err = errors.New("unsupported format")
} else {
if major, err = strconv.ParseInt(matches[1], 10, 16); err == nil {
minor, err = strconv.ParseInt(matches[2], 10, 16)
}
}

if err != nil {
err = fmt.Errorf("failed to parse kernel release %q: %w", uname.osRelease, err)
}

return major, minor, err
}

0 comments on commit 96c150a

Please sign in to comment.