diff --git a/docs/raspberry-pi4.md b/docs/raspberry-pi4.md index f3962ad2bdbe..d5b044d5ff9a 100644 --- a/docs/raspberry-pi4.md +++ b/docs/raspberry-pi4.md @@ -165,8 +165,8 @@ Operating system: Linux (pass) cgroup controller "cpuacct": available (via cpu in version 2) (pass) cgroup controller "cpuset": available (pass) cgroup controller "memory": available (pass) - cgroup controller "devices": available (assumed) (pass) - cgroup controller "freezer": available (assumed) (pass) + cgroup controller "devices": unknown (warning: insufficient permissions, try with elevated permissions) + cgroup controller "freezer": available (cgroup.freeze exists) (pass) cgroup controller "pids": available (pass) cgroup controller "hugetlb": available (pass) cgroup controller "blkio": available (via io in version 2) (pass) diff --git a/go.mod b/go.mod index b8db55a7f56c..b0fbe25fdf28 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,9 @@ require ( github.com/bombsimon/logrusr/v4 v4.0.0 github.com/carlmjohnson/requests v0.23.5 github.com/cavaliergopher/grab/v3 v3.0.1 + github.com/cilium/ebpf v0.11.0 github.com/cloudflare/cfssl v1.6.4 + github.com/containerd/cgroups/v3 v3.0.2 github.com/containerd/containerd v1.7.6 github.com/denisbrodbeck/machineid v1.0.1 github.com/estesp/manifest-tool/v2 v2.0.8 @@ -33,6 +35,7 @@ require ( github.com/mitchellh/go-homedir v1.1.0 github.com/olekukonko/tablewriter v0.0.5 github.com/opencontainers/image-spec v1.1.0-rc5 + github.com/opencontainers/runtime-spec v1.1.0-rc.2 github.com/otiai10/copy v1.14.0 github.com/pelletier/go-toml v1.9.5 github.com/robfig/cron v1.2.0 @@ -54,7 +57,7 @@ require ( go.uber.org/multierr v1.11.0 go.uber.org/zap v1.26.0 golang.org/x/crypto v0.14.0 - golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 + golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb golang.org/x/mod v0.13.0 golang.org/x/sync v0.4.0 golang.org/x/sys v0.13.0 @@ -106,9 +109,7 @@ require ( github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect - github.com/cilium/ebpf v0.9.1 // indirect github.com/containerd/cgroups v1.1.0 // indirect - github.com/containerd/cgroups/v3 v3.0.2 // indirect github.com/containerd/console v1.0.3 // indirect github.com/containerd/continuity v0.4.2 // indirect github.com/containerd/fifo v1.1.0 // indirect @@ -211,7 +212,6 @@ require ( github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/runc v1.1.9 // indirect - github.com/opencontainers/runtime-spec v1.1.0-rc.2 // indirect github.com/opencontainers/selinux v1.11.0 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pkg/errors v0.9.1 // indirect diff --git a/go.sum b/go.sum index 68711b1b7e5d..ad317338b374 100644 --- a/go.sum +++ b/go.sum @@ -133,8 +133,8 @@ github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHe github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4= -github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY= +github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= +github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudflare/cfssl v1.6.4 h1:NMOvfrEjFfC63K3SGXgAnFdsgkmiq4kATme5BfcqrO8= github.com/cloudflare/cfssl v1.6.4/go.mod h1:8b3CQMxfWPAeom3zBnGJ6sd+G1NkL5TXqmDXacb+1J0= @@ -255,8 +255,8 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/foxcpp/go-mockdns v1.0.0 h1:7jBqxd3WDWwi/6WhDvacvH1XsN3rOLXyHM1uhvIx6FI= -github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= +github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= @@ -959,8 +959,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 h1:tnebWN09GYg9OLPss1KXj8txwZc6X6uMr6VFdcGNbHw= -golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= +golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb h1:xIApU0ow1zwMa2uL1VDNeQlNVFTWMQxZUZCMDy0Q4Us= +golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= diff --git a/internal/pkg/sysinfo/probes/linux/cgroup_controllers.go b/internal/pkg/sysinfo/probes/linux/cgroup_controllers.go index 362a8059bd32..6df7902b5650 100644 --- a/internal/pkg/sysinfo/probes/linux/cgroup_controllers.go +++ b/internal/pkg/sysinfo/probes/linux/cgroup_controllers.go @@ -60,6 +60,9 @@ func (c *cgroupControllerProbe) Probe(reporter probes.Reporter) error { } else if available, err := sys.probeController(c.name); err != nil { return reporter.Error(desc, err) } else if available.available { + if available.warning != "" { + return reporter.Warn(desc, available, available.warning) + } return reporter.Pass(desc, available) } else if c.require { return reporter.Reject(desc, available, "") @@ -71,10 +74,15 @@ func (c *cgroupControllerProbe) Probe(reporter probes.Reporter) error { type cgroupControllerAvailable struct { available bool msg string + warning string } func (a cgroupControllerAvailable) String() (msg string) { if a.available { + if a.warning != "" { + return a.msg + } + msg = "available" } else { msg = "unavailable" @@ -97,7 +105,7 @@ func (p *cgroupControllerProber) probeController(s cgroupSystem, controllerName p.once.Do(func() { p.controllers = make(map[string]cgroupControllerAvailable) p.err = s.loadControllers(func(name, msg string) { - p.controllers[name] = cgroupControllerAvailable{true, msg} + p.controllers[name] = cgroupControllerAvailable{true, msg, ""} }) }) return p.controllers[controllerName], p.err diff --git a/internal/pkg/sysinfo/probes/linux/cgroup_v2.go b/internal/pkg/sysinfo/probes/linux/cgroup_v2.go index b1979404f17f..9ce4627f70c7 100644 --- a/internal/pkg/sysinfo/probes/linux/cgroup_v2.go +++ b/internal/pkg/sysinfo/probes/linux/cgroup_v2.go @@ -21,11 +21,17 @@ package linux import ( "errors" "fmt" + "io/fs" "os" "path/filepath" - "regexp" - "strconv" "strings" + + "k8s.io/utils/pointer" + + "github.com/cilium/ebpf/rlimit" + "github.com/containerd/cgroups/v3/cgroup2" + "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" ) type cgroupV2 struct { @@ -39,63 +45,192 @@ func (*cgroupV2) String() string { } func (g *cgroupV2) probeController(controllerName string) (cgroupControllerAvailable, error) { + switch controllerName { + case "devices": + return g.detectDevicesController() + case "freezer": + return g.detectFreezerController() + } return g.controllers.probeController(g, controllerName) } func (g *cgroupV2) loadControllers(seen func(string, string)) error { - // Some controllers are implicitly enabled by the kernel. Those controllers - // do not appear in /sys/fs/cgroup/cgroup.controllers. Their availability is - // assumed based on the kernel version, as it is hard to detect them - // directly. - // https://github.com/torvalds/linux/blob/v5.3/kernel/cgroup/cgroup.c#L433-L434 - if major, minor, err := parseKernelRelease(g.probeUname); err == nil { - /* devices: since 4.15 */ if major > 4 || (major == 4 && minor >= 15) { - seen("devices", "assumed") - } - /* freezer: since 5.2 */ if major > 5 || (major == 5 && minor >= 2) { - seen("freezer", "assumed") - } - } else { - return err + return g.detectListedRootControllers(seen) +} + +// Detects the device controller by trying to attach a dummy program of type +// BPF_CGROUP_DEVICE to a cgroup. Since the controller has no interface files +// and is implemented purely on top of BPF, this is the only reliable way to +// detect it. A best-guess detection via the kernel version has the major +// drawback of not working with kernels that have a lot of backported features, +// such as RHEL and friends. +// +// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#device-controller +func (g *cgroupV2) detectDevicesController() (cgroupControllerAvailable, error) { + err := attachDummyDeviceFilter(g.mountPoint) + switch { + case err == nil: + return cgroupControllerAvailable{true, "device filters attachable", ""}, nil + + // EACCES occurs when not allowed to create cgroups. + // EPERM occurs when not allowed to load eBPF programs. + case errors.Is(err, os.ErrPermission) && os.Geteuid() != 0: + return cgroupControllerAvailable{true, "unknown", "insufficient permissions, try with elevated permissions"}, nil + case errors.Is(err, unix.EROFS): + return cgroupControllerAvailable{true, "unknown", fmt.Sprintf("read-only file system: %s", g.mountPoint)}, nil + + case eBPFProgramUnsupported(err): + return cgroupControllerAvailable{false, err.Error(), ""}, nil } - controllerData, err := os.ReadFile(filepath.Join(g.mountPoint, "cgroup.controllers")) + return cgroupControllerAvailable{}, err +} + +// Attaches a dummy program of type BPF_CGROUP_DEVICE to a randomly created +// cgroup and removes the program and cgroup again. +func attachDummyDeviceFilter(mountPoint string) (err error) { + insts, license, err := cgroup2.DeviceFilter([]specs.LinuxDeviceCgroup{{ + Allow: true, + Type: "a", + Major: pointer.Int64(-1), + Minor: pointer.Int64(-1), + Access: "rwm", + }}) if err != nil { - return err + return fmt.Errorf("failed to create eBPF device filter program: %w", err) } - for _, controllerName := range strings.Fields(string(controllerData)) { - seen(controllerName, "") - switch controllerName { - case "cpu": // This is the successor to the version 1 cpu and cpuacct controllers. - seen("cpuacct", "via cpu in "+g.String()) - case "io": // This is the successor of the version 1 blkio controller. - seen("blkio", "via io in "+g.String()) + tmpCgroupPath, err := os.MkdirTemp(mountPoint, "k0s-devices-detection-*") + if err != nil { + return fmt.Errorf("failed to create temporary cgroup: %w", err) + } + defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }() + + dirFD, err := unix.Open(tmpCgroupPath, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("failed to open temporary cgroup: %w", &fs.PathError{Op: "open", Path: tmpCgroupPath, Err: err}) + } + defer func() { + if closeErr := unix.Close(dirFD); closeErr != nil { + err = errors.Join(err, &fs.PathError{Op: "close", Path: tmpCgroupPath, Err: closeErr}) + } + }() + + close, err := cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD) + if err != nil { + // RemoveMemlock may be required on kernels < 5.11 + // observed on debian 11: 5.10.0-21-armmp-lpae #1 SMP Debian 5.10.162-1 (2023-01-21) armv7l + // https://github.com/cilium/ebpf/blob/v0.11.0/prog.go#L356-L360 + if errors.Is(err, unix.EPERM) && strings.Contains(err.Error(), "RemoveMemlock") { + if err2 := rlimit.RemoveMemlock(); err2 != nil { + err = errors.Join(err, err2) + } else { + // Try again, MEMLOCK should be removed by now. + close, err2 = cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD) + if err2 != nil { + err = errors.Join(err, err2) + } else { + err = nil + } + } } } + if err != nil { + if eBPFProgramUnsupported(err) { + return err + } + return fmt.Errorf("failed to load/attach eBPF device filter program: %w", err) + } - return nil + return close() +} + +// Returns true if the given error indicates that an eBPF program is unsupported +// by the kernel. +func eBPFProgramUnsupported(err error) bool { + // https://github.com/cilium/ebpf/blob/v0.11.0/features/prog.go#L43-L49 + + switch { + // EINVAL occurs when attempting to create a program with an unknown type. + case errors.Is(err, unix.EINVAL): + return true + + // E2BIG occurs when ProgLoadAttr contains non-zero bytes past the end of + // the struct known by the running kernel, meaning the kernel is too old to + // support the given prog type. + case errors.Is(err, unix.E2BIG): + return true + + default: + return false + } } -func parseKernelRelease(probeUname unameProber) (int64, int64, error) { - uname, err := probeUname() +// Detect the freezer controller. It doesn't appear in the cgroup.controllers +// file. Check for the existence of the cgroup.freeze file in the k0s cgroup +// instead, or try to create a dummy cgroup if k0s runs in the root cgroup. +// +// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files +func (g *cgroupV2) detectFreezerController() (cgroupControllerAvailable, error) { + + // Detect the freezer controller by checking k0s's cgroup for the existence + // of the cgroup.freeze file. + // https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#processes + cgroupPath, err := cgroup2.NestedGroupPath("") if err != nil { - return 0, 0, err + return cgroupControllerAvailable{}, fmt.Errorf("failed to get k0s cgroup: %w", err) } - var major, minor int64 - r := regexp.MustCompile(`^(\d+)\.(\d+)(\.|$)`) - if matches := r.FindStringSubmatch(uname.osRelease.value); matches == nil { - err = errors.New("unsupported format") - } else { - if major, err = strconv.ParseInt(matches[1], 10, 16); err == nil { - minor, err = strconv.ParseInt(matches[2], 10, 16) + if cgroupPath != "/" { + cgroupPath = filepath.Join(g.mountPoint, cgroupPath) + } else { // The root cgroup cannot be frozen. Try to create a dummy cgroup. + tmpCgroupPath, err := os.MkdirTemp(g.mountPoint, "k0s-freezer-detection-*") + if err != nil { + if errors.Is(err, os.ErrPermission) && os.Geteuid() != 0 { + return cgroupControllerAvailable{true, "unknown", "insufficient permissions, try with elevated permissions"}, nil + } + if errors.Is(err, unix.EROFS) && os.Geteuid() != 0 { + return cgroupControllerAvailable{true, "unknown", fmt.Sprintf("read-only file system: %s", g.mountPoint)}, nil + } + + return cgroupControllerAvailable{}, fmt.Errorf("failed to create temporary cgroup: %w", err) } + defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }() + cgroupPath = tmpCgroupPath + } + + // Check if the cgroup.freeze exists + if stat, err := os.Stat(filepath.Join(cgroupPath, "cgroup.freeze")); (err == nil && stat.IsDir()) || os.IsNotExist(err) { + return cgroupControllerAvailable{false, "cgroup.freeze doesn't exist", ""}, nil + } else if err != nil { + return cgroupControllerAvailable{}, err } + return cgroupControllerAvailable{true, "cgroup.freeze exists", ""}, nil +} +// Detects all the listed root controllers. +// +// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files +func (g *cgroupV2) detectListedRootControllers(seen func(string, string)) (err error) { + root, err := cgroup2.Load("/", cgroup2.WithMountpoint(g.mountPoint)) if err != nil { - err = fmt.Errorf("failed to parse kernel release %q: %w", uname.osRelease, err) + return fmt.Errorf("failed to load root cgroup: %w", err) } - return major, minor, err + controllerNames, err := root.RootControllers() // This reads cgroup.controllers + if err != nil { + return fmt.Errorf("failed to list cgroup root controllers: %w", err) + } + + for _, controllerName := range controllerNames { + seen(controllerName, "is a listed root controller") + switch controllerName { + case "cpu": // This is the successor to the version 1 cpu and cpuacct controllers. + seen("cpuacct", "via cpu in "+g.String()) + case "io": // This is the successor of the version 1 blkio controller. + seen("blkio", "via io in "+g.String()) + } + } + + return nil } diff --git a/internal/pkg/sysinfo/probes/linux/cgroups_test.go b/internal/pkg/sysinfo/probes/linux/cgroups_test.go index 7112f05e2c2e..29bdeb3fe5d7 100644 --- a/internal/pkg/sysinfo/probes/linux/cgroups_test.go +++ b/internal/pkg/sysinfo/probes/linux/cgroups_test.go @@ -59,7 +59,7 @@ func TestCgroupsProbes_Probe(t *testing.T) { t.Run("Pass", func(t *testing.T) { init() - available := cgroupControllerAvailable{true, ""} + available := cgroupControllerAvailable{true, "", ""} reporter.On("Pass", mock.Anything, mockSys).Return(nil) mockSys.On("probeController", "foo").Return(available, nil)