Skip to content

Commit

Permalink
Merge pull request #3077 from twz123/cgroupv2-devices-freezer-detection
Browse files Browse the repository at this point in the history
Revisit devices and freezer controller detection for cgroup v2
  • Loading branch information
twz123 authored Oct 16, 2023
2 parents 5d9c87d + fba2a21 commit ebcb172
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 51 deletions.
4 changes: 2 additions & 2 deletions docs/raspberry-pi4.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ Operating system: Linux (pass)
cgroup controller "cpuacct": available (via cpu in version 2) (pass)
cgroup controller "cpuset": available (pass)
cgroup controller "memory": available (pass)
cgroup controller "devices": available (assumed) (pass)
cgroup controller "freezer": available (assumed) (pass)
cgroup controller "devices": unknown (warning: insufficient permissions, try with elevated permissions)
cgroup controller "freezer": available (cgroup.freeze exists) (pass)
cgroup controller "pids": available (pass)
cgroup controller "hugetlb": available (pass)
cgroup controller "blkio": available (via io in version 2) (pass)
Expand Down
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ require (
github.com/bombsimon/logrusr/v4 v4.0.0
github.com/carlmjohnson/requests v0.23.5
github.com/cavaliergopher/grab/v3 v3.0.1
github.com/cilium/ebpf v0.11.0
github.com/cloudflare/cfssl v1.6.4
github.com/containerd/cgroups/v3 v3.0.2
github.com/containerd/containerd v1.7.6
github.com/denisbrodbeck/machineid v1.0.1
github.com/estesp/manifest-tool/v2 v2.0.8
Expand All @@ -33,6 +35,7 @@ require (
github.com/mitchellh/go-homedir v1.1.0
github.com/olekukonko/tablewriter v0.0.5
github.com/opencontainers/image-spec v1.1.0-rc5
github.com/opencontainers/runtime-spec v1.1.0-rc.2
github.com/otiai10/copy v1.14.0
github.com/pelletier/go-toml v1.9.5
github.com/robfig/cron v1.2.0
Expand All @@ -54,7 +57,7 @@ require (
go.uber.org/multierr v1.11.0
go.uber.org/zap v1.26.0
golang.org/x/crypto v0.14.0
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91
golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb
golang.org/x/mod v0.13.0
golang.org/x/sync v0.4.0
golang.org/x/sys v0.13.0
Expand Down Expand Up @@ -106,9 +109,7 @@ require (
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/chai2010/gettext-go v1.0.2 // indirect
github.com/cilium/ebpf v0.9.1 // indirect
github.com/containerd/cgroups v1.1.0 // indirect
github.com/containerd/cgroups/v3 v3.0.2 // indirect
github.com/containerd/console v1.0.3 // indirect
github.com/containerd/continuity v0.4.2 // indirect
github.com/containerd/fifo v1.1.0 // indirect
Expand Down Expand Up @@ -211,7 +212,6 @@ require (
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/runc v1.1.9 // indirect
github.com/opencontainers/runtime-spec v1.1.0-rc.2 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
Expand Down
10 changes: 5 additions & 5 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHe
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4=
github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY=
github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y=
github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudflare/cfssl v1.6.4 h1:NMOvfrEjFfC63K3SGXgAnFdsgkmiq4kATme5BfcqrO8=
github.com/cloudflare/cfssl v1.6.4/go.mod h1:8b3CQMxfWPAeom3zBnGJ6sd+G1NkL5TXqmDXacb+1J0=
Expand Down Expand Up @@ -255,8 +255,8 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/foxcpp/go-mockdns v1.0.0 h1:7jBqxd3WDWwi/6WhDvacvH1XsN3rOLXyHM1uhvIx6FI=
github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE=
github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps=
github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
Expand Down Expand Up @@ -959,8 +959,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 h1:tnebWN09GYg9OLPss1KXj8txwZc6X6uMr6VFdcGNbHw=
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb h1:xIApU0ow1zwMa2uL1VDNeQlNVFTWMQxZUZCMDy0Q4Us=
golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
Expand Down
10 changes: 9 additions & 1 deletion internal/pkg/sysinfo/probes/linux/cgroup_controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ func (c *cgroupControllerProbe) Probe(reporter probes.Reporter) error {
} else if available, err := sys.probeController(c.name); err != nil {
return reporter.Error(desc, err)
} else if available.available {
if available.warning != "" {
return reporter.Warn(desc, available, available.warning)
}
return reporter.Pass(desc, available)
} else if c.require {
return reporter.Reject(desc, available, "")
Expand All @@ -71,10 +74,15 @@ func (c *cgroupControllerProbe) Probe(reporter probes.Reporter) error {
type cgroupControllerAvailable struct {
available bool
msg string
warning string
}

func (a cgroupControllerAvailable) String() (msg string) {
if a.available {
if a.warning != "" {
return a.msg
}

msg = "available"
} else {
msg = "unavailable"
Expand All @@ -97,7 +105,7 @@ func (p *cgroupControllerProber) probeController(s cgroupSystem, controllerName
p.once.Do(func() {
p.controllers = make(map[string]cgroupControllerAvailable)
p.err = s.loadControllers(func(name, msg string) {
p.controllers[name] = cgroupControllerAvailable{true, msg}
p.controllers[name] = cgroupControllerAvailable{true, msg, ""}
})
})
return p.controllers[controllerName], p.err
Expand Down
211 changes: 173 additions & 38 deletions internal/pkg/sysinfo/probes/linux/cgroup_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,17 @@ package linux
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"

"k8s.io/utils/pointer"

"github.com/cilium/ebpf/rlimit"
"github.com/containerd/cgroups/v3/cgroup2"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)

type cgroupV2 struct {
Expand All @@ -39,63 +45,192 @@ func (*cgroupV2) String() string {
}

func (g *cgroupV2) probeController(controllerName string) (cgroupControllerAvailable, error) {
switch controllerName {
case "devices":
return g.detectDevicesController()
case "freezer":
return g.detectFreezerController()
}
return g.controllers.probeController(g, controllerName)
}

func (g *cgroupV2) loadControllers(seen func(string, string)) error {
// Some controllers are implicitly enabled by the kernel. Those controllers
// do not appear in /sys/fs/cgroup/cgroup.controllers. Their availability is
// assumed based on the kernel version, as it is hard to detect them
// directly.
// https://github.com/torvalds/linux/blob/v5.3/kernel/cgroup/cgroup.c#L433-L434
if major, minor, err := parseKernelRelease(g.probeUname); err == nil {
/* devices: since 4.15 */ if major > 4 || (major == 4 && minor >= 15) {
seen("devices", "assumed")
}
/* freezer: since 5.2 */ if major > 5 || (major == 5 && minor >= 2) {
seen("freezer", "assumed")
}
} else {
return err
return g.detectListedRootControllers(seen)
}

// Detects the device controller by trying to attach a dummy program of type
// BPF_CGROUP_DEVICE to a cgroup. Since the controller has no interface files
// and is implemented purely on top of BPF, this is the only reliable way to
// detect it. A best-guess detection via the kernel version has the major
// drawback of not working with kernels that have a lot of backported features,
// such as RHEL and friends.
//
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#device-controller
func (g *cgroupV2) detectDevicesController() (cgroupControllerAvailable, error) {
err := attachDummyDeviceFilter(g.mountPoint)
switch {
case err == nil:
return cgroupControllerAvailable{true, "device filters attachable", ""}, nil

// EACCES occurs when not allowed to create cgroups.
// EPERM occurs when not allowed to load eBPF programs.
case errors.Is(err, os.ErrPermission) && os.Geteuid() != 0:
return cgroupControllerAvailable{true, "unknown", "insufficient permissions, try with elevated permissions"}, nil
case errors.Is(err, unix.EROFS):
return cgroupControllerAvailable{true, "unknown", fmt.Sprintf("read-only file system: %s", g.mountPoint)}, nil

case eBPFProgramUnsupported(err):
return cgroupControllerAvailable{false, err.Error(), ""}, nil
}

controllerData, err := os.ReadFile(filepath.Join(g.mountPoint, "cgroup.controllers"))
return cgroupControllerAvailable{}, err
}

// Attaches a dummy program of type BPF_CGROUP_DEVICE to a randomly created
// cgroup and removes the program and cgroup again.
func attachDummyDeviceFilter(mountPoint string) (err error) {
insts, license, err := cgroup2.DeviceFilter([]specs.LinuxDeviceCgroup{{
Allow: true,
Type: "a",
Major: pointer.Int64(-1),
Minor: pointer.Int64(-1),
Access: "rwm",
}})
if err != nil {
return err
return fmt.Errorf("failed to create eBPF device filter program: %w", err)
}

for _, controllerName := range strings.Fields(string(controllerData)) {
seen(controllerName, "")
switch controllerName {
case "cpu": // This is the successor to the version 1 cpu and cpuacct controllers.
seen("cpuacct", "via cpu in "+g.String())
case "io": // This is the successor of the version 1 blkio controller.
seen("blkio", "via io in "+g.String())
tmpCgroupPath, err := os.MkdirTemp(mountPoint, "k0s-devices-detection-*")
if err != nil {
return fmt.Errorf("failed to create temporary cgroup: %w", err)
}
defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }()

dirFD, err := unix.Open(tmpCgroupPath, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("failed to open temporary cgroup: %w", &fs.PathError{Op: "open", Path: tmpCgroupPath, Err: err})
}
defer func() {
if closeErr := unix.Close(dirFD); closeErr != nil {
err = errors.Join(err, &fs.PathError{Op: "close", Path: tmpCgroupPath, Err: closeErr})
}
}()

close, err := cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD)
if err != nil {
// RemoveMemlock may be required on kernels < 5.11
// observed on debian 11: 5.10.0-21-armmp-lpae #1 SMP Debian 5.10.162-1 (2023-01-21) armv7l
// https://github.com/cilium/ebpf/blob/v0.11.0/prog.go#L356-L360
if errors.Is(err, unix.EPERM) && strings.Contains(err.Error(), "RemoveMemlock") {
if err2 := rlimit.RemoveMemlock(); err2 != nil {
err = errors.Join(err, err2)
} else {
// Try again, MEMLOCK should be removed by now.
close, err2 = cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD)
if err2 != nil {
err = errors.Join(err, err2)
} else {
err = nil
}
}
}
}
if err != nil {
if eBPFProgramUnsupported(err) {
return err
}
return fmt.Errorf("failed to load/attach eBPF device filter program: %w", err)
}

return nil
return close()
}

// Returns true if the given error indicates that an eBPF program is unsupported
// by the kernel.
func eBPFProgramUnsupported(err error) bool {
// https://github.com/cilium/ebpf/blob/v0.11.0/features/prog.go#L43-L49

switch {
// EINVAL occurs when attempting to create a program with an unknown type.
case errors.Is(err, unix.EINVAL):
return true

// E2BIG occurs when ProgLoadAttr contains non-zero bytes past the end of
// the struct known by the running kernel, meaning the kernel is too old to
// support the given prog type.
case errors.Is(err, unix.E2BIG):
return true

default:
return false
}
}

func parseKernelRelease(probeUname unameProber) (int64, int64, error) {
uname, err := probeUname()
// Detect the freezer controller. It doesn't appear in the cgroup.controllers
// file. Check for the existence of the cgroup.freeze file in the k0s cgroup
// instead, or try to create a dummy cgroup if k0s runs in the root cgroup.
//
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files
func (g *cgroupV2) detectFreezerController() (cgroupControllerAvailable, error) {

// Detect the freezer controller by checking k0s's cgroup for the existence
// of the cgroup.freeze file.
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#processes
cgroupPath, err := cgroup2.NestedGroupPath("")
if err != nil {
return 0, 0, err
return cgroupControllerAvailable{}, fmt.Errorf("failed to get k0s cgroup: %w", err)
}

var major, minor int64
r := regexp.MustCompile(`^(\d+)\.(\d+)(\.|$)`)
if matches := r.FindStringSubmatch(uname.osRelease.value); matches == nil {
err = errors.New("unsupported format")
} else {
if major, err = strconv.ParseInt(matches[1], 10, 16); err == nil {
minor, err = strconv.ParseInt(matches[2], 10, 16)
if cgroupPath != "/" {
cgroupPath = filepath.Join(g.mountPoint, cgroupPath)
} else { // The root cgroup cannot be frozen. Try to create a dummy cgroup.
tmpCgroupPath, err := os.MkdirTemp(g.mountPoint, "k0s-freezer-detection-*")
if err != nil {
if errors.Is(err, os.ErrPermission) && os.Geteuid() != 0 {
return cgroupControllerAvailable{true, "unknown", "insufficient permissions, try with elevated permissions"}, nil
}
if errors.Is(err, unix.EROFS) && os.Geteuid() != 0 {
return cgroupControllerAvailable{true, "unknown", fmt.Sprintf("read-only file system: %s", g.mountPoint)}, nil
}

return cgroupControllerAvailable{}, fmt.Errorf("failed to create temporary cgroup: %w", err)
}
defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }()
cgroupPath = tmpCgroupPath
}

// Check if the cgroup.freeze exists
if stat, err := os.Stat(filepath.Join(cgroupPath, "cgroup.freeze")); (err == nil && stat.IsDir()) || os.IsNotExist(err) {
return cgroupControllerAvailable{false, "cgroup.freeze doesn't exist", ""}, nil
} else if err != nil {
return cgroupControllerAvailable{}, err
}
return cgroupControllerAvailable{true, "cgroup.freeze exists", ""}, nil
}

// Detects all the listed root controllers.
//
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files
func (g *cgroupV2) detectListedRootControllers(seen func(string, string)) (err error) {
root, err := cgroup2.Load("/", cgroup2.WithMountpoint(g.mountPoint))
if err != nil {
err = fmt.Errorf("failed to parse kernel release %q: %w", uname.osRelease, err)
return fmt.Errorf("failed to load root cgroup: %w", err)
}

return major, minor, err
controllerNames, err := root.RootControllers() // This reads cgroup.controllers
if err != nil {
return fmt.Errorf("failed to list cgroup root controllers: %w", err)
}

for _, controllerName := range controllerNames {
seen(controllerName, "is a listed root controller")
switch controllerName {
case "cpu": // This is the successor to the version 1 cpu and cpuacct controllers.
seen("cpuacct", "via cpu in "+g.String())
case "io": // This is the successor of the version 1 blkio controller.
seen("blkio", "via io in "+g.String())
}
}

return nil
}
2 changes: 1 addition & 1 deletion internal/pkg/sysinfo/probes/linux/cgroups_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func TestCgroupsProbes_Probe(t *testing.T) {
t.Run("Pass", func(t *testing.T) {
init()

available := cgroupControllerAvailable{true, ""}
available := cgroupControllerAvailable{true, "", ""}

reporter.On("Pass", mock.Anything, mockSys).Return(nil)
mockSys.On("probeController", "foo").Return(available, nil)
Expand Down

0 comments on commit ebcb172

Please sign in to comment.