From 3826b1493402bc567c904e441fe13e08245d137c Mon Sep 17 00:00:00 2001 From: Ilya Hanov Date: Tue, 18 Oct 2022 20:06:43 +0800 Subject: [PATCH] IMA Namespace initial support Added new "ima" namespace type for config.json configuration file for runc, e.g.: ... "namespaces": [ ... { "type": "ima" }, ... ], ... IMA namespace will be created only if USER namespace is, too, specified, otherwise runc returns error, so the only allowed scheme for enabling IMA namespace is: ... "namespaces": [ ... { "type": "user" }, { "type": "ima" }, ... ], ... otherwise runc returns an error. Signed-off-by: Ilya Hanov --- libcontainer/configs/namespaces_linux.go | 22 +++++ libcontainer/configs/namespaces_syscall.go | 26 +++++- .../configs/namespaces_syscall_unsupported.go | 7 ++ libcontainer/configs/validate/validator.go | 7 +- libcontainer/container_linux.go | 21 +++-- libcontainer/message_linux.go | 23 +++--- libcontainer/nsenter/namespace.h | 8 ++ libcontainer/nsenter/nsexec.c | 81 +++++++++++++++++++ libcontainer/specconv/spec_linux.go | 1 + 9 files changed, 176 insertions(+), 20 deletions(-) diff --git a/libcontainer/configs/namespaces_linux.go b/libcontainer/configs/namespaces_linux.go index d52d6fcd147..d493c659a70 100644 --- a/libcontainer/configs/namespaces_linux.go +++ b/libcontainer/configs/namespaces_linux.go @@ -14,6 +14,7 @@ const ( NEWIPC NamespaceType = "NEWIPC" NEWUSER NamespaceType = "NEWUSER" NEWCGROUP NamespaceType = "NEWCGROUP" + NEWIMA NamespaceType = "NEWIMA" ) var ( @@ -38,6 +39,8 @@ func NsName(ns NamespaceType) string { return "uts" case NEWCGROUP: return "cgroup" + case NEWIMA: + return "ima" } return "" } @@ -56,6 +59,11 @@ func IsNamespaceSupported(ns NamespaceType) bool { if nsFile == "" { return false } + // ima namespace is a part of userns and + // has no procfs entry, so redirect the check to userns + if ns == NEWIMA { + nsFile = NsName(NEWUSER) + } _, err := os.Stat("/proc/self/ns/" + nsFile) // a namespace is supported if it exists and we have permissions to read it supported = err == nil @@ -72,6 +80,7 @@ func NamespaceTypes() []NamespaceType { NEWPID, NEWNS, NEWCGROUP, + NEWIMA, } } @@ -82,10 +91,23 @@ type Namespace struct { Path string `json:"path"` } +// GetPath() concatenates given pid and namespace type +// into procfs path. A few namespaces may not have procfs entry, +// call HasProcfs() first to make sure this namespace has one. func (n *Namespace) GetPath(pid int) string { return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type)) } +// HasProcfs() checks if the given namespace has procfs entry. +func (n *Namespace) HasProcfs() bool { + switch n.Type { + case NEWIMA: + return false + default: + return true + } +} + func (n *Namespaces) Remove(t NamespaceType) bool { i := n.index(t) if i == -1 { diff --git a/libcontainer/configs/namespaces_syscall.go b/libcontainer/configs/namespaces_syscall.go index 0516dba8d09..509b5c3bb3b 100644 --- a/libcontainer/configs/namespaces_syscall.go +++ b/libcontainer/configs/namespaces_syscall.go @@ -6,10 +6,10 @@ package configs import "golang.org/x/sys/unix" func (n *Namespace) Syscall() int { - return namespaceInfo[n.Type] + return namespaceCloneInfo[n.Type] } -var namespaceInfo = map[NamespaceType]int{ +var namespaceCloneInfo = map[NamespaceType]int{ NEWNET: unix.CLONE_NEWNET, NEWNS: unix.CLONE_NEWNS, NEWUSER: unix.CLONE_NEWUSER, @@ -19,6 +19,12 @@ var namespaceInfo = map[NamespaceType]int{ NEWCGROUP: unix.CLONE_NEWCGROUP, } +// These flags must be the same according to +// libcontainer/nsenter/namespace.h +var namespaceNonCloneInfo = map[NamespaceType]int{ + NEWIMA: 0x80000000, +} + // CloneFlags parses the container's Namespaces options to set the correct // flags on clone, unshare. This function returns flags only for new namespaces. func (n *Namespaces) CloneFlags() uintptr { @@ -27,7 +33,21 @@ func (n *Namespaces) CloneFlags() uintptr { if v.Path != "" { continue } - flag |= namespaceInfo[v.Type] + flag |= namespaceCloneInfo[v.Type] + } + return uintptr(flag) +} + +// NonCloneFlags parses the container's Namespaces options that are not +// related to clone() or unshare() system calls. This function returns +// flags only for new namespaces. +func (n *Namespaces) NonCloneFlags() uintptr { + var flag int + for _, v := range *n { + if v.Path != "" { + continue + } + flag |= namespaceNonCloneInfo[v.Type] } return uintptr(flag) } diff --git a/libcontainer/configs/namespaces_syscall_unsupported.go b/libcontainer/configs/namespaces_syscall_unsupported.go index fbb0d49071e..90c5d57e25c 100644 --- a/libcontainer/configs/namespaces_syscall_unsupported.go +++ b/libcontainer/configs/namespaces_syscall_unsupported.go @@ -12,3 +12,10 @@ func (n *Namespace) Syscall() int { func (n *Namespaces) CloneFlags() uintptr { panic("No namespace syscall support") } + +// NonCloneFlags parses the container's Namespaces options that are not +// related to clone() or unshare() system calls. This function returns +// flags only for new namespaces. +func (n *Namespaces) NonCloneFlags() uintptr { + panic("No namespace syscall support") +} diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 2027a37203e..f52a6df15ea 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -96,7 +96,12 @@ func security(config *configs.Config) error { } func namespaces(config *configs.Config) error { - if config.Namespaces.Contains(configs.NEWUSER) { + // ima namespace is a part of user namespace but still is a separate namespace + if config.Namespaces.Contains(configs.NEWIMA) && !config.Namespaces.Contains(configs.NEWUSER) { + return errors.New("IMA namespace cannot be created without USER namespace") + } + + if config.Namespaces.Contains(configs.NEWUSER) || config.Namespaces.Contains(configs.NEWIMA) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { return errors.New("USER namespaces aren't enabled in the kernel") } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 34f38f34e0c..162f1582a97 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -537,7 +537,8 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, l } } _, sharePidns := nsMaps[configs.NEWPID] - data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard) + data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), + c.config.Namespaces.NonCloneFlags(), nsMaps, initStandard) if err != nil { return nil, err } @@ -595,7 +596,7 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, } // for setns process, we don't have to set cloneflags as the process namespaces // will only be set via setns syscall - data, err := c.bootstrapData(0, state.NamespacePaths, initSetns) + data, err := c.bootstrapData(0, 0, state.NamespacePaths, initSetns) if err != nil { return nil, err } @@ -2028,7 +2029,9 @@ func (c *Container) currentState() (*State, error) { } if pid > 0 { for _, ns := range c.config.Namespaces { - state.NamespacePaths[ns.Type] = ns.GetPath(pid) + if ns.HasProcfs() { + state.NamespacePaths[ns.Type] = ns.GetPath(pid) + } } for _, nsType := range configs.NamespaceTypes() { if !configs.IsNamespaceSupported(nsType) { @@ -2036,7 +2039,9 @@ func (c *Container) currentState() (*State, error) { } if _, ok := state.NamespacePaths[nsType]; !ok { ns := configs.Namespace{Type: nsType} - state.NamespacePaths[ns.Type] = ns.GetPath(pid) + if ns.HasProcfs() { + state.NamespacePaths[ns.Type] = ns.GetPath(pid) + } } } } @@ -2119,7 +2124,7 @@ type netlinkError struct{ error } // such as one that uses nsenter package to bootstrap the container's // init process correctly, i.e. with correct namespaces, uid/gid // mapping etc. -func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (_ io.Reader, Err error) { +func (c *Container) bootstrapData(cloneFlags uintptr, nonCloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (_ io.Reader, Err error) { // create the netlink message r := nl.NewNetlinkRequest(int(InitMsg), 0) @@ -2142,6 +2147,12 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa Value: uint32(cloneFlags), }) + // write nonCloneFlags + r.AddData(&Int32msg{ + Type: NonCloneFlagsAttr, + Value: uint32(nonCloneFlags), + }) + // write custom namespace paths if len(nsMaps) > 0 { nsPaths, err := c.orderNamespacePaths(nsMaps) diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index 6d1107e875d..a224a991894 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -11,17 +11,18 @@ import ( // list of known message types we want to send to bootstrap program // The number is randomly chosen to not conflict with known netlink types const ( - InitMsg uint16 = 62000 - CloneFlagsAttr uint16 = 27281 - NsPathsAttr uint16 = 27282 - UidmapAttr uint16 = 27283 - GidmapAttr uint16 = 27284 - SetgroupAttr uint16 = 27285 - OomScoreAdjAttr uint16 = 27286 - RootlessEUIDAttr uint16 = 27287 - UidmapPathAttr uint16 = 27288 - GidmapPathAttr uint16 = 27289 - MountSourcesAttr uint16 = 27290 + InitMsg uint16 = 62000 + CloneFlagsAttr uint16 = 27281 + NsPathsAttr uint16 = 27282 + UidmapAttr uint16 = 27283 + GidmapAttr uint16 = 27284 + SetgroupAttr uint16 = 27285 + OomScoreAdjAttr uint16 = 27286 + RootlessEUIDAttr uint16 = 27287 + UidmapPathAttr uint16 = 27288 + GidmapPathAttr uint16 = 27289 + MountSourcesAttr uint16 = 27290 + NonCloneFlagsAttr uint16 = 27291 ) type Int32msg struct { diff --git a/libcontainer/nsenter/namespace.h b/libcontainer/nsenter/namespace.h index 9e9bdca05e1..5bb461353ed 100644 --- a/libcontainer/nsenter/namespace.h +++ b/libcontainer/nsenter/namespace.h @@ -28,5 +28,13 @@ #ifndef CLONE_NEWNET # define CLONE_NEWNET 0x40000000 /* New network namespace */ #endif +/* + * IMA Namespace has no CLONE_* flag, it is activated + * via securityfs, so clone()/unshare() system calls are not aware + * of IMA Namespace. + */ +#ifndef NCLONE_NEWIMA +# define NCLONE_NEWIMA 0x80000000 /* New ima namespace */ +#endif #endif /* NSENTER_NAMESPACE_H */ diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 9ecf791e93f..acdeefb8446 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -72,6 +73,7 @@ struct nlconfig_t { /* Process settings. */ uint32_t cloneflags; + uint32_t noncloneflags; char *oom_score_adj; size_t oom_score_adj_len; @@ -127,6 +129,7 @@ static int loglevel = DEBUG; #define UIDMAPPATH_ATTR 27288 #define GIDMAPPATH_ATTR 27289 #define MOUNT_SOURCES_ATTR 27290 +#define NON_CLONE_FLAGS_ATTR 27291 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -518,6 +521,9 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; + case NON_CLONE_FLAGS_ATTR: + config->noncloneflags = readint32(current); + break; case ROOTLESS_EUID_ATTR: config->is_rootless_euid = readint8(current); /* boolean */ break; @@ -632,6 +638,76 @@ void join_namespaces(char *nslist) free(namespaces); } +/* + * enable_ima_ns - sets up IMA Namespace for the process + * + * In order to set it up correctly calling process + * should have new userns first, because if it is in the first userns then + * new imans won't be created (imans has no CLONE_* flags). The function does next things: + * + * 1. It mounts temporary securityfs, in order to not bother hosts's + * filesystem, we've chosen container's rootfs as a location for this, + * (to be more accurate we've chosen /mnt inside the rootfs). Make + * sure the calling process already created new userns. + * + * 2. It writes '1' to rootfs/mnt/integrity/ima/active. This is + * the key stage of enabling imans. After that imans will be actually created. + * + * 3. Unmount temporary securityfs. + * + */ +#define IMA_ACTIVE_PATH "integrity/ima/active" +/* IMA securityfs relative mount point */ +#define IMA_SECURITYFS_REL_MNT "/mnt/" + +static void enable_ima_ns(void) +{ + int err, mntflags, ima_active_path_len; + char data, ima_active[PATH_MAX + 1]; + + ima_active_path_len = strlen(IMA_ACTIVE_PATH); + /* + * Now current working directory is our rootfs, + * so in order to avoid some conflicts with Host filesystem, + * we should mount securityfs inside the rootfs of the currently + * starting container. The mount point for this securityfs is + * CWD + "/mnt" + */ + if(!getcwd(ima_active, PATH_MAX)) + bail("failed to get path of the current rootfs"); + + if ((strnlen(ima_active, PATH_MAX) + strlen(IMA_SECURITYFS_REL_MNT)) > PATH_MAX) + bail("securityfs temporary path is too long"); + + strncat(ima_active, IMA_SECURITYFS_REL_MNT, PATH_MAX); + mntflags = MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME; + err = mount("securityfs", ima_active, "securityfs", mntflags , NULL); + if (err != 0) + bail("failed to mount temporary securityfs"); + + /* + * According to security/integrity/ima/ima_fs.c:ima_write_active() + * the length of the buffer <= 2 and it is only allowed to write '1', '1\0' or '1\n' + */ + if ((strnlen(ima_active, PATH_MAX) + ima_active_path_len) > PATH_MAX) + bail("ima active path is too long"); + + data = '1'; + strncat(ima_active, IMA_ACTIVE_PATH, PATH_MAX); + err = write_file(&data, 1, ima_active); + if (err < 0) + bail("cannot set up IMA namespace"); + + /* + * cut ima_active path to be able to unmount securityfs + * 1 is to remove the last unnecessary slash '/.../path/' -> '/.../path' + */ + ima_active[strnlen(ima_active, PATH_MAX) - ima_active_path_len - 1] = '\0'; + err = umount(ima_active); + if (err != 0) + bail("failed to unmount temporary securityfs"); +} + /* Defined in cloned_binary.c. */ extern int ensure_cloned_binary(void); @@ -1226,6 +1302,11 @@ void nsexec(void) if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0) bail("failed to unshare remaining namespaces (except cgroupns)"); + if (config.noncloneflags & NCLONE_NEWIMA) { + write_log(DEBUG, "enable ima namespace"); + enable_ima_ns(); + } + /* Ask our parent to send the mount sources fds. */ if (config.mountsources) { s = SYNC_MOUNTSOURCES_PLS; diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index d62e34be713..a996c48b3fd 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -49,6 +49,7 @@ func initMaps() { specs.IPCNamespace: configs.NEWIPC, specs.UTSNamespace: configs.NEWUTS, specs.CgroupNamespace: configs.NEWCGROUP, + specs.ImaNamespace: configs.NEWIMA, } mountPropagationMapping = map[string]int{