Skip to content

Commit

Permalink
Revisit device controller detection for cgroup v2
Browse files Browse the repository at this point in the history
The cgroup v2 device controller is not listed in the cgroup.controllers
and is solely available via the kernel's BPF interface. Therefore, k0s
sysinfo determines its presence based on the Linux kernel version. This
is problematic for old kernels that have many backported features, such
as RHEL and consorts.

However, it is still possible to detect the device controller by trying
to attach a dummy device filter to an empty, temporary cgroup. In case
k0s is unable to create the cgroup or attach the device filter because
of missing permissions, the presence of the devices controller is simply
assumed.

See: 0655941 ("Add pre-flight checks and probes module")
Signed-off-by: Tom Wieczorek <[email protected]>
  • Loading branch information
twz123 committed Jul 11, 2023
1 parent ba8b80a commit b252379
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 38 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ require (
github.com/mitchellh/go-homedir v1.1.0
github.com/olekukonko/tablewriter v0.0.5
github.com/opencontainers/image-spec v1.1.0-rc4
github.com/opencontainers/runtime-spec v1.1.0-rc.2
github.com/otiai10/copy v1.12.0
github.com/pelletier/go-toml v1.9.5
github.com/robfig/cron v1.2.0
Expand Down Expand Up @@ -208,7 +209,6 @@ require (
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/runc v1.1.7 // indirect
github.com/opencontainers/runtime-spec v1.1.0-rc.2 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
Expand Down
104 changes: 67 additions & 37 deletions internal/pkg/sysinfo/probes/linux/cgroup_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@ package linux
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strconv"

"k8s.io/utils/pointer"

"github.com/containerd/cgroups/v3/cgroup2"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)

type cgroupV2 struct {
Expand All @@ -44,22 +47,72 @@ func (g *cgroupV2) probeController(controllerName string) (cgroupControllerAvail
return g.controllers.probeController(g, controllerName)
}

func (g *cgroupV2) loadControllers(seen func(string, string)) error {
// The device controller has no interface files. Its availability is assumed
// based on the kernel version, as it is hard to detect it directly.
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#device-controller
if major, minor, err := parseKernelRelease(g.probeUname); err == nil {
/* devices: since 4.15 */ if major > 4 || (major == 4 && minor >= 15) {
seen("devices", "assumed")
func (s *cgroupV2) loadControllers(seen func(string, string)) error {
return errors.Join(
s.detectDevicesController(seen),
s.detectFreezerController(seen),
s.detectListedRootControllers(seen),
)
}

// Detects the device controller by trying to attach a dummy program of type
// BPF_CGROUP_DEVICE to a cgroup. Since the controller has no interface files
// and is implemented purely on top of BPF, this is the only reliable way to
// detect it. A best-guess detection via the kernel version has the major
// drawback of not working with kernels that have a lot of backported features,
// such as RHEL and friends.
//
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#device-controller
func (s *cgroupV2) detectDevicesController(seen func(string, string)) error {
err := attachDummyDeviceFilter(s.mountPoint)
switch {
case err == nil:
seen("devices", "device filters attachable")
return nil
case errors.Is(err, os.ErrPermission) && os.Geteuid() != 0:
seen("devices", "assumed, insufficient permissions, try with elevated permissions")
return nil
}

return err
}

// Attaches a dummy program of type BPF_CGROUP_DEVICE to a randomly created
// cgroup and removes the program and cgroup again.
func attachDummyDeviceFilter(mountPoint string) (err error) {
insts, license, err := cgroup2.DeviceFilter([]specs.LinuxDeviceCgroup{{
Allow: true,
Type: "a",
Major: pointer.Int64(-1),
Minor: pointer.Int64(-1),
Access: "rwm",
}})
if err != nil {
return err
}

tmpCgroupPath, err := os.MkdirTemp(mountPoint, "k0s-devices-detection-*")
if err != nil {
return err
}
defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }()

dirFD, err := unix.Open(tmpCgroupPath, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return &fs.PathError{Op: "open", Path: tmpCgroupPath, Err: err}
}
defer func() {
if closeErr := unix.Close(dirFD); closeErr != nil {
err = errors.Join(err, &fs.PathError{Op: "close", Path: tmpCgroupPath, Err: closeErr})
}
} else {
}()

close, err := cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD)
if err != nil {
return err
}

return errors.Join(
g.detectFreezerController(seen),
g.detectListedRootControllers(seen),
)
return close()
}

// Detect the freezer controller. It doesn't appear in the cgroup.controllers
Expand Down Expand Up @@ -134,26 +187,3 @@ func (g *cgroupV2) detectListedRootControllers(seen func(string, string)) (err e

return nil
}

func parseKernelRelease(probeUname unameProber) (int64, int64, error) {
uname, err := probeUname()
if err != nil {
return 0, 0, err
}

var major, minor int64
r := regexp.MustCompile(`^(\d+)\.(\d+)(\.|$)`)
if matches := r.FindStringSubmatch(uname.osRelease.value); matches == nil {
err = errors.New("unsupported format")
} else {
if major, err = strconv.ParseInt(matches[1], 10, 16); err == nil {
minor, err = strconv.ParseInt(matches[2], 10, 16)
}
}

if err != nil {
err = fmt.Errorf("failed to parse kernel release %q: %w", uname.osRelease, err)
}

return major, minor, err
}

0 comments on commit b252379

Please sign in to comment.