Compare commits
No commits in common. 'c9' and 'c8-stream-2.0' have entirely different histories.
c9
...
c8-stream-
@ -1 +1 @@
|
||||
SOURCES/v1.1.12.tar.gz
|
||||
SOURCES/runc-dc9208a.tar.gz
|
||||
|
@ -1 +1 @@
|
||||
3fac650358578b8694012a44b1d5b156523c3402 SOURCES/v1.1.12.tar.gz
|
||||
32859590dea35b77eed012c388d97fc12fdfdb93 SOURCES/runc-dc9208a.tar.gz
|
||||
|
@ -0,0 +1,540 @@
|
||||
From 2dd156b190c02476191fc2522f9b0e0a1a098608 Mon Sep 17 00:00:00 2001
|
||||
From: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
Date: Mon, 17 May 2021 16:11:35 -0700
|
||||
Subject: [PATCH] rootfs: add mount destination validation
|
||||
|
||||
This is a manual backport of fix for CVE-2021-30465 to runc-1.0.0-rc10
|
||||
(aka -rc90), upstream commit 84c14b43fa703db7 by Aleksa Sarai.
|
||||
|
||||
Original description follows.
|
||||
|
||||
----
|
||||
|
||||
Because the target of a mount is inside a container (which may be a
|
||||
volume that is shared with another container), there exists a race
|
||||
condition where the target of the mount may change to a path containing
|
||||
a symlink after we have sanitised the path -- resulting in us
|
||||
inadvertently mounting the path outside of the container.
|
||||
|
||||
This is not immediately useful because we are in a mount namespace with
|
||||
MS_SLAVE mount propagation applied to "/", so we cannot mount on top of
|
||||
host paths in the host namespace. However, if any subsequent mountpoints
|
||||
in the configuration use a subdirectory of that host path as a source,
|
||||
those subsequent mounts will use an attacker-controlled source path
|
||||
(resolved within the host rootfs) -- allowing the bind-mounting of "/"
|
||||
into the container.
|
||||
|
||||
While arguably configuration issues like this are not entirely within
|
||||
runc's threat model, within the context of Kubernetes (and possibly
|
||||
other container managers that provide semi-arbitrary container creation
|
||||
privileges to untrusted users) this is a legitimate issue. Since we
|
||||
cannot block mounting from the host into the container, we need to block
|
||||
the first stage of this attack (mounting onto a path outside the
|
||||
container).
|
||||
|
||||
The long-term plan to solve this would be to migrate to libpathrs, but
|
||||
as a stop-gap we implement libpathrs-like path verification through
|
||||
readlink(/proc/self/fd/$n) and then do mount operations through the
|
||||
procfd once it's been verified to be inside the container. The target
|
||||
could move after we've checked it, but if it is inside the container
|
||||
then we can assume that it is safe for the same reason that libpathrs
|
||||
operations would be safe.
|
||||
|
||||
A slight wrinkle is the "copyup" functionality we provide for tmpfs,
|
||||
which is the only case where we want to do a mount on the host
|
||||
filesystem. To facilitate this, I split out the copy-up functionality
|
||||
entirely so that the logic isn't interspersed with the regular tmpfs
|
||||
logic. In addition, all dependencies on m.Destination being overwritten
|
||||
have been removed since that pattern was just begging to be a source of
|
||||
more mount-target bugs (we do still have to modify m.Destination for
|
||||
tmpfs-copyup but we only do it temporarily).
|
||||
|
||||
Fixes: CVE-2021-30465
|
||||
Reported-by: Etienne Champetier <champetier.etienne@gmail.com>
|
||||
Co-authored-by: Noah Meyerhans <nmeyerha@amazon.com>
|
||||
Reviewed-by: Samuel Karp <skarp@amazon.com>
|
||||
Reviewed-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
|
||||
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
||||
|
||||
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
---
|
||||
libcontainer/rootfs_linux.go | 225 ++++++++++++++++---------------
|
||||
libcontainer/utils/utils.go | 54 ++++++++
|
||||
libcontainer/utils/utils_test.go | 35 +++++
|
||||
3 files changed, 204 insertions(+), 110 deletions(-)
|
||||
|
||||
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
||||
index 106c4c2b..fe9afe48 100644
|
||||
--- a/libcontainer/rootfs_linux.go
|
||||
+++ b/libcontainer/rootfs_linux.go
|
||||
@@ -19,8 +19,9 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
- libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
+ "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
+ "github.com/sirupsen/logrus"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
@@ -30,7 +31,7 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||
// needsSetupDev returns true if /dev needs to be set up.
|
||||
func needsSetupDev(config *configs.Config) bool {
|
||||
for _, m := range config.Mounts {
|
||||
- if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
||||
+ if m.Device == "bind" && utils.CleanPath(m.Destination) == "/dev" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
@@ -131,7 +132,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
func finalizeRootfs(config *configs.Config) (err error) {
|
||||
// remount dev as ro if specified
|
||||
for _, m := range config.Mounts {
|
||||
- if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
||||
+ if utils.CleanPath(m.Destination) == "/dev" {
|
||||
if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
|
||||
if err := remountReadonly(m); err != nil {
|
||||
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
|
||||
@@ -200,8 +201,6 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
|
||||
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
||||
return err
|
||||
}
|
||||
- // update the mount with the correct dest after symlinks are resolved.
|
||||
- m.Destination = dest
|
||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -238,18 +237,21 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
- flags := defaultMountFlags
|
||||
- if m.Flags&unix.MS_RDONLY != 0 {
|
||||
- flags = flags | unix.MS_RDONLY
|
||||
- }
|
||||
- cgroupmount := &configs.Mount{
|
||||
- Source: "cgroup",
|
||||
- Device: "cgroup",
|
||||
- Destination: subsystemPath,
|
||||
- Flags: flags,
|
||||
- Data: filepath.Base(subsystemPath),
|
||||
- }
|
||||
- if err := mountNewCgroup(cgroupmount); err != nil {
|
||||
+ if err := utils.WithProcfd(rootfs, b.Destination, func(procfd string) error {
|
||||
+ flags := defaultMountFlags
|
||||
+ if m.Flags&unix.MS_RDONLY != 0 {
|
||||
+ flags = flags | unix.MS_RDONLY
|
||||
+ }
|
||||
+ var (
|
||||
+ source = "cgroup"
|
||||
+ data = filepath.Base(subsystemPath)
|
||||
+ )
|
||||
+ if data == "systemd" {
|
||||
+ data = cgroups.CgroupNamePrefix + data
|
||||
+ source = "systemd"
|
||||
+ }
|
||||
+ return unix.Mount(source, procfd, "cgroup", uintptr(flags), data)
|
||||
+ }); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
@@ -279,22 +281,67 @@ func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
if err := os.MkdirAll(cgroupPath, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
- if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||
- // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||
- if err == unix.EPERM || err == unix.EBUSY {
|
||||
- return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
+ if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||
+ // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||
+ if err == unix.EPERM || err == unix.EBUSY {
|
||||
+ return unix.Mount("/sys/fs/cgroup", procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||
+ }
|
||||
+ return err
|
||||
}
|
||||
+ return nil
|
||||
+ })
|
||||
+}
|
||||
+
|
||||
+func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||
+ // Set up a scratch dir for the tmpfs on the host.
|
||||
+ tmpdir, err := prepareTmp("/tmp")
|
||||
+ if err != nil {
|
||||
+ return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
||||
+ }
|
||||
+ defer cleanupTmp(tmpdir)
|
||||
+ tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
|
||||
+ if err != nil {
|
||||
+ return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||
+ }
|
||||
+ defer os.RemoveAll(tmpDir)
|
||||
+
|
||||
+ // Configure the *host* tmpdir as if it's the container mount. We change
|
||||
+ // m.Destination since we are going to mount *on the host*.
|
||||
+ oldDest := m.Destination
|
||||
+ m.Destination = tmpDir
|
||||
+ err = mountPropagate(m, "/", mountLabel)
|
||||
+ m.Destination = oldDest
|
||||
+ if err != nil {
|
||||
return err
|
||||
}
|
||||
- return nil
|
||||
+ defer func() {
|
||||
+ if Err != nil {
|
||||
+ if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil {
|
||||
+ logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err)
|
||||
+ }
|
||||
+ }
|
||||
+ }()
|
||||
+
|
||||
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) {
|
||||
+ // Copy the container data to the host tmpdir. We append "/" to force
|
||||
+ // CopyDirectory to resolve the symlink rather than trying to copy the
|
||||
+ // symlink itself.
|
||||
+ if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil {
|
||||
+ return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %v", m.Destination, procfd, tmpDir, err)
|
||||
+ }
|
||||
+ // Now move the mount into the container.
|
||||
+ if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil {
|
||||
+ return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %v", tmpDir, procfd, m.Destination, err)
|
||||
+ }
|
||||
+ return nil
|
||||
+ })
|
||||
}
|
||||
|
||||
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
|
||||
- var (
|
||||
- dest = m.Destination
|
||||
- )
|
||||
- if !strings.HasPrefix(dest, rootfs) {
|
||||
- dest = filepath.Join(rootfs, dest)
|
||||
+ dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||
+ if err != nil {
|
||||
+ return err
|
||||
}
|
||||
|
||||
switch m.Device {
|
||||
@@ -329,46 +376,21 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
}
|
||||
return nil
|
||||
case "tmpfs":
|
||||
- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||
- tmpDir := ""
|
||||
stat, err := os.Stat(dest)
|
||||
if err != nil {
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
- if copyUp {
|
||||
- tmpdir, err := prepareTmp("/tmp")
|
||||
- if err != nil {
|
||||
- return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
||||
- }
|
||||
- defer cleanupTmp(tmpdir)
|
||||
- tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir")
|
||||
- if err != nil {
|
||||
- return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||
- }
|
||||
- defer os.RemoveAll(tmpDir)
|
||||
- m.Destination = tmpDir
|
||||
+
|
||||
+ if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
|
||||
+ err = doTmpfsCopyUp(m, rootfs, mountLabel)
|
||||
+ } else {
|
||||
+ err = mountPropagate(m, rootfs, mountLabel)
|
||||
}
|
||||
- if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
+ if err != nil {
|
||||
return err
|
||||
}
|
||||
- if copyUp {
|
||||
- if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
|
||||
- errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
|
||||
- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||
- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||
- }
|
||||
- return errMsg
|
||||
- }
|
||||
- if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
|
||||
- errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
|
||||
- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||
- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||
- }
|
||||
- return errMsg
|
||||
- }
|
||||
- }
|
||||
if stat != nil {
|
||||
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
||||
return err
|
||||
@@ -424,19 +446,9 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
}
|
||||
}
|
||||
default:
|
||||
- // ensure that the destination of the mount is resolved of symlinks at mount time because
|
||||
- // any previous mounts can invalidate the next mount's destination.
|
||||
- // this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
||||
- // evil stuff to try to escape the container's rootfs.
|
||||
- var err error
|
||||
- if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
|
||||
- return err
|
||||
- }
|
||||
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
||||
return err
|
||||
}
|
||||
- // update the mount with the correct dest after symlinks are resolved.
|
||||
- m.Destination = dest
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -611,7 +623,7 @@ func createDevices(config *configs.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
-func bindMountDeviceNode(dest string, node *configs.Device) error {
|
||||
+func bindMountDeviceNode(rootfs, dest string, node *configs.Device) error {
|
||||
f, err := os.Create(dest)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
@@ -619,24 +631,29 @@ func bindMountDeviceNode(dest string, node *configs.Device) error {
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
- return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
|
||||
+ return utils.WithProcfd(rootfs, dest, func(procfd string) error {
|
||||
+ return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
|
||||
+ })
|
||||
}
|
||||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
||||
- dest := filepath.Join(rootfs, node.Path)
|
||||
+ dest, err := securejoin.SecureJoin(rootfs, node.Path)
|
||||
+ if err != nil {
|
||||
+ return err
|
||||
+ }
|
||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if bind {
|
||||
- return bindMountDeviceNode(dest, node)
|
||||
+ return bindMountDeviceNode(rootfs, dest, node)
|
||||
}
|
||||
if err := mknodDevice(dest, node); err != nil {
|
||||
if os.IsExist(err) {
|
||||
return nil
|
||||
} else if os.IsPermission(err) {
|
||||
- return bindMountDeviceNode(dest, node)
|
||||
+ return bindMountDeviceNode(rootfs, dest, node)
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -955,55 +972,43 @@ func writeSystemProperty(key, value string) error {
|
||||
}
|
||||
|
||||
func remount(m *configs.Mount, rootfs string) error {
|
||||
- var (
|
||||
- dest = m.Destination
|
||||
- )
|
||||
- if !strings.HasPrefix(dest, rootfs) {
|
||||
- dest = filepath.Join(rootfs, dest)
|
||||
- }
|
||||
- return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
||||
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
+ return unix.Mount(m.Source, procfd, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
||||
+ })
|
||||
}
|
||||
|
||||
// Do the mount operation followed by additional mounts required to take care
|
||||
-// of propagation flags.
|
||||
+// of propagation flags. This will always be scoped inside the container rootfs.
|
||||
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||
var (
|
||||
- dest = m.Destination
|
||||
data = label.FormatMountLabel(m.Data, mountLabel)
|
||||
flags = m.Flags
|
||||
)
|
||||
- if libcontainerUtils.CleanPath(dest) == "/dev" {
|
||||
+ if utils.CleanPath(m.Destination) == "/dev" {
|
||||
flags &= ^unix.MS_RDONLY
|
||||
}
|
||||
|
||||
- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||
- if !(copyUp || strings.HasPrefix(dest, rootfs)) {
|
||||
- dest = filepath.Join(rootfs, dest)
|
||||
- }
|
||||
-
|
||||
- if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
|
||||
- return err
|
||||
- }
|
||||
-
|
||||
- for _, pflag := range m.PropagationFlags {
|
||||
- if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
||||
- return err
|
||||
+ // Because the destination is inside a container path which might be
|
||||
+ // mutating underneath us, we verify that we are actually going to mount
|
||||
+ // inside the container with WithProcfd() -- mounting through a procfd
|
||||
+ // mounts on the target.
|
||||
+ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
+ return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data)
|
||||
+ }); err != nil {
|
||||
+ return fmt.Errorf("mount through procfd: %v", err)
|
||||
+ }
|
||||
+ // We have to apply mount propagation flags in a separate WithProcfd() call
|
||||
+ // because the previous call invalidates the passed procfd -- the mount
|
||||
+ // target needs to be re-opened.
|
||||
+ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
+ for _, pflag := range m.PropagationFlags {
|
||||
+ if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
|
||||
+ return err
|
||||
+ }
|
||||
}
|
||||
- }
|
||||
- return nil
|
||||
-}
|
||||
-
|
||||
-func mountNewCgroup(m *configs.Mount) error {
|
||||
- var (
|
||||
- data = m.Data
|
||||
- source = m.Source
|
||||
- )
|
||||
- if data == "systemd" {
|
||||
- data = cgroups.CgroupNamePrefix + data
|
||||
- source = "systemd"
|
||||
- }
|
||||
- if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
|
||||
- return err
|
||||
+ return nil
|
||||
+ }); err != nil {
|
||||
+ return fmt.Errorf("change mount propagation through procfd: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go
|
||||
index 40ccfaa1..c1418ef9 100644
|
||||
--- a/libcontainer/utils/utils.go
|
||||
+++ b/libcontainer/utils/utils.go
|
||||
@@ -2,12 +2,15 @@ package utils
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
+ "fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
+ "strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
+ securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -73,6 +76,57 @@ func CleanPath(path string) string {
|
||||
return filepath.Clean(path)
|
||||
}
|
||||
|
||||
+// stripRoot returns the passed path, stripping the root path if it was
|
||||
+// (lexicially) inside it. Note that both passed paths will always be treated
|
||||
+// as absolute, and the returned path will also always be absolute. In
|
||||
+// addition, the paths are cleaned before stripping the root.
|
||||
+func stripRoot(root, path string) string {
|
||||
+ // Make the paths clean and absolute.
|
||||
+ root, path = CleanPath("/"+root), CleanPath("/"+path)
|
||||
+ switch {
|
||||
+ case path == root:
|
||||
+ path = "/"
|
||||
+ case root == "/":
|
||||
+ // do nothing
|
||||
+ case strings.HasPrefix(path, root+"/"):
|
||||
+ path = strings.TrimPrefix(path, root+"/")
|
||||
+ }
|
||||
+ return CleanPath("/" + path)
|
||||
+}
|
||||
+
|
||||
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
||||
+// corresponding to the unsafePath resolved within the root. Before passing the
|
||||
+// fd, this path is verified to have been inside the root -- so operating on it
|
||||
+// through the passed fdpath should be safe. Do not access this path through
|
||||
+// the original path strings, and do not attempt to use the pathname outside of
|
||||
+// the passed closure (the file handle will be freed once the closure returns).
|
||||
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||
+ // Remove the root then forcefully resolve inside the root.
|
||||
+ unsafePath = stripRoot(root, unsafePath)
|
||||
+ path, err := securejoin.SecureJoin(root, unsafePath)
|
||||
+ if err != nil {
|
||||
+ return fmt.Errorf("resolving path inside rootfs failed: %v", err)
|
||||
+ }
|
||||
+
|
||||
+ // Open the target path.
|
||||
+ fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
+ if err != nil {
|
||||
+ return fmt.Errorf("open o_path procfd: %v", err)
|
||||
+ }
|
||||
+ defer fh.Close()
|
||||
+
|
||||
+ // Double-check the path is the one we expected.
|
||||
+ procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
|
||||
+ if realpath, err := os.Readlink(procfd); err != nil {
|
||||
+ return fmt.Errorf("procfd verification failed: %v", err)
|
||||
+ } else if realpath != path {
|
||||
+ return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
||||
+ }
|
||||
+
|
||||
+ // Run the closure.
|
||||
+ return fn(procfd)
|
||||
+}
|
||||
+
|
||||
// SearchLabels searches a list of key-value pairs for the provided key and
|
||||
// returns the corresponding value. The pairs must be separated with '='.
|
||||
func SearchLabels(labels []string, query string) string {
|
||||
diff --git a/libcontainer/utils/utils_test.go b/libcontainer/utils/utils_test.go
|
||||
index 395eedcf..5b80cac6 100644
|
||||
--- a/libcontainer/utils/utils_test.go
|
||||
+++ b/libcontainer/utils/utils_test.go
|
||||
@@ -140,3 +140,38 @@ func TestCleanPath(t *testing.T) {
|
||||
t.Errorf("expected to receive '/foo' and received %s", path)
|
||||
}
|
||||
}
|
||||
+
|
||||
+func TestStripRoot(t *testing.T) {
|
||||
+ for _, test := range []struct {
|
||||
+ root, path, out string
|
||||
+ }{
|
||||
+ // Works with multiple components.
|
||||
+ {"/a/b", "/a/b/c", "/c"},
|
||||
+ {"/hello/world", "/hello/world/the/quick-brown/fox", "/the/quick-brown/fox"},
|
||||
+ // '/' must be a no-op.
|
||||
+ {"/", "/a/b/c", "/a/b/c"},
|
||||
+ // Must be the correct order.
|
||||
+ {"/a/b", "/a/c/b", "/a/c/b"},
|
||||
+ // Must be at start.
|
||||
+ {"/abc/def", "/foo/abc/def/bar", "/foo/abc/def/bar"},
|
||||
+ // Must be a lexical parent.
|
||||
+ {"/foo/bar", "/foo/barSAMECOMPONENT", "/foo/barSAMECOMPONENT"},
|
||||
+ // Must only strip the root once.
|
||||
+ {"/foo/bar", "/foo/bar/foo/bar/baz", "/foo/bar/baz"},
|
||||
+ // Deal with .. in a fairly sane way.
|
||||
+ {"/foo/bar", "/foo/bar/../baz", "/foo/baz"},
|
||||
+ {"/foo/bar", "../../../../../../foo/bar/baz", "/baz"},
|
||||
+ {"/foo/bar", "/../../../../../../foo/bar/baz", "/baz"},
|
||||
+ {"/foo/bar/../baz", "/foo/baz/bar", "/bar"},
|
||||
+ {"/foo/bar/../baz", "/foo/baz/../bar/../baz/./foo", "/foo"},
|
||||
+ // All paths are made absolute before stripping.
|
||||
+ {"foo/bar", "/foo/bar/baz/bee", "/baz/bee"},
|
||||
+ {"/foo/bar", "foo/bar/baz/beef", "/baz/beef"},
|
||||
+ {"foo/bar", "foo/bar/baz/beets", "/baz/beets"},
|
||||
+ } {
|
||||
+ got := stripRoot(test.root, test.path)
|
||||
+ if got != test.out {
|
||||
+ t.Errorf("stripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out)
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,278 @@
|
||||
From 3d99c51e1b38a440804a55c9f314f62cc50b8902 Mon Sep 17 00:00:00 2001
|
||||
From: Giuseppe Scrivano <gscrivan@redhat.com>
|
||||
Date: Fri, 25 May 2018 18:04:06 +0200
|
||||
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
|
||||
|
||||
if NOTIFY_SOCKET is used, do not block the main runc process waiting
|
||||
for events on the notify socket. Bind mount the parent directory of
|
||||
the notify socket, so that "start" can create the socket and it is
|
||||
still accessible from the container.
|
||||
|
||||
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
|
||||
---
|
||||
notify_socket.go | 112 ++++++++++++++++++++++++++++++++++-------------
|
||||
signals.go | 4 +-
|
||||
start.go | 13 +++++-
|
||||
utils_linux.go | 12 ++++-
|
||||
4 files changed, 105 insertions(+), 36 deletions(-)
|
||||
|
||||
diff --git a/notify_socket.go b/notify_socket.go
|
||||
index e7453c62..d961453a 100644
|
||||
--- a/notify_socket.go
|
||||
+++ b/notify_socket.go
|
||||
@@ -7,11 +7,13 @@ import (
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
+ "path"
|
||||
"path/filepath"
|
||||
+ "strconv"
|
||||
+ "time"
|
||||
|
||||
+ "github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
-
|
||||
- "github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
@@ -27,12 +29,12 @@ func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *
|
||||
}
|
||||
|
||||
root := filepath.Join(context.GlobalString("root"), id)
|
||||
- path := filepath.Join(root, "notify.sock")
|
||||
+ socketPath := filepath.Join(root, "notify", "notify.sock")
|
||||
|
||||
notifySocket := ¬ifySocket{
|
||||
socket: nil,
|
||||
host: notifySocketHost,
|
||||
- socketPath: path,
|
||||
+ socketPath: socketPath,
|
||||
}
|
||||
|
||||
return notifySocket
|
||||
@@ -44,13 +46,19 @@ func (s *notifySocket) Close() error {
|
||||
|
||||
// If systemd is supporting sd_notify protocol, this function will add support
|
||||
// for sd_notify protocol from within the container.
|
||||
-func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) {
|
||||
- mount := specs.Mount{Destination: s.host, Source: s.socketPath, Options: []string{"bind"}}
|
||||
+func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) error {
|
||||
+ pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath))
|
||||
+ mount := specs.Mount{
|
||||
+ Destination: path.Dir(pathInContainer),
|
||||
+ Source: path.Dir(s.socketPath),
|
||||
+ Options: []string{"bind", "nosuid", "noexec", "nodev", "ro"},
|
||||
+ }
|
||||
spec.Mounts = append(spec.Mounts, mount)
|
||||
- spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
|
||||
+ spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", pathInContainer))
|
||||
+ return nil
|
||||
}
|
||||
|
||||
-func (s *notifySocket) setupSocket() error {
|
||||
+func (s *notifySocket) bindSocket() error {
|
||||
addr := net.UnixAddr{
|
||||
Name: s.socketPath,
|
||||
Net: "unixgram",
|
||||
@@ -71,45 +79,89 @@ func (s *notifySocket) setupSocket() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
-// pid1 must be set only with -d, as it is used to set the new process as the main process
|
||||
-// for the service in systemd
|
||||
-func (s *notifySocket) run(pid1 int) {
|
||||
- buf := make([]byte, 512)
|
||||
- notifySocketHostAddr := net.UnixAddr{Name: s.host, Net: "unixgram"}
|
||||
+func (s *notifySocket) setupSocketDirectory() error {
|
||||
+ return os.Mkdir(path.Dir(s.socketPath), 0755)
|
||||
+}
|
||||
+
|
||||
+func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) {
|
||||
+ notifySocket := newNotifySocket(context, notifySocketHost, id)
|
||||
+ if notifySocket == nil {
|
||||
+ return nil, nil
|
||||
+ }
|
||||
+
|
||||
+ if err := notifySocket.bindSocket(); err != nil {
|
||||
+ return nil, err
|
||||
+ }
|
||||
+ return notifySocket, nil
|
||||
+}
|
||||
+
|
||||
+func (n *notifySocket) waitForContainer(container libcontainer.Container) error {
|
||||
+ s, err := container.State()
|
||||
+ if err != nil {
|
||||
+ return err
|
||||
+ }
|
||||
+ return n.run(s.InitProcessPid)
|
||||
+}
|
||||
+
|
||||
+func (n *notifySocket) run(pid1 int) error {
|
||||
+ if n.socket == nil {
|
||||
+ return nil
|
||||
+ }
|
||||
+ notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}
|
||||
client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
||||
if err != nil {
|
||||
- logrus.Error(err)
|
||||
- return
|
||||
+ return err
|
||||
}
|
||||
- for {
|
||||
- r, err := s.socket.Read(buf)
|
||||
- if err != nil {
|
||||
- break
|
||||
+
|
||||
+ ticker := time.NewTicker(time.Millisecond * 100)
|
||||
+ defer ticker.Stop()
|
||||
+
|
||||
+ fileChan := make(chan []byte)
|
||||
+ go func() {
|
||||
+ for {
|
||||
+ buf := make([]byte, 512)
|
||||
+ r, err := n.socket.Read(buf)
|
||||
+ if err != nil {
|
||||
+ return
|
||||
+ }
|
||||
+ got := buf[0:r]
|
||||
+ if !bytes.HasPrefix(got, []byte("READY=")) {
|
||||
+ continue
|
||||
+ }
|
||||
+ fileChan <- got
|
||||
+ return
|
||||
}
|
||||
- var out bytes.Buffer
|
||||
- for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
|
||||
- if bytes.HasPrefix(line, []byte("READY=")) {
|
||||
+ }()
|
||||
+
|
||||
+ for {
|
||||
+ select {
|
||||
+ case <-ticker.C:
|
||||
+ _, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))
|
||||
+ if err != nil {
|
||||
+ return nil
|
||||
+ }
|
||||
+ case b := <-fileChan:
|
||||
+ for _, line := range bytes.Split(b, []byte{'\n'}) {
|
||||
+ var out bytes.Buffer
|
||||
_, err = out.Write(line)
|
||||
if err != nil {
|
||||
- return
|
||||
+ return err
|
||||
}
|
||||
|
||||
_, err = out.Write([]byte{'\n'})
|
||||
if err != nil {
|
||||
- return
|
||||
+ return err
|
||||
}
|
||||
|
||||
_, err = client.Write(out.Bytes())
|
||||
if err != nil {
|
||||
- return
|
||||
+ return err
|
||||
}
|
||||
|
||||
// now we can inform systemd to use pid1 as the pid to monitor
|
||||
- if pid1 > 0 {
|
||||
- newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
||||
- client.Write([]byte(newPid))
|
||||
- }
|
||||
- return
|
||||
+ newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
||||
+ client.Write([]byte(newPid))
|
||||
+ return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
diff --git a/signals.go b/signals.go
|
||||
index b67f65a0..dd25e094 100644
|
||||
--- a/signals.go
|
||||
+++ b/signals.go
|
||||
@@ -70,6 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
||||
h.notifySocket.run(pid1)
|
||||
return 0, nil
|
||||
}
|
||||
+ h.notifySocket.run(os.Getpid())
|
||||
go h.notifySocket.run(0)
|
||||
}
|
||||
|
||||
@@ -97,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
||||
// status because we must ensure that any of the go specific process
|
||||
// fun such as flushing pipes are complete before we return.
|
||||
process.Wait()
|
||||
- if h.notifySocket != nil {
|
||||
- h.notifySocket.Close()
|
||||
- }
|
||||
return e.status, nil
|
||||
}
|
||||
}
|
||||
diff --git a/start.go b/start.go
|
||||
index 2bb698b2..3a1769a4 100644
|
||||
--- a/start.go
|
||||
+++ b/start.go
|
||||
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
+ "os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/urfave/cli"
|
||||
@@ -31,7 +32,17 @@ your host.`,
|
||||
}
|
||||
switch status {
|
||||
case libcontainer.Created:
|
||||
- return container.Exec()
|
||||
+ notifySocket, err := notifySocketStart(context, os.Getenv("NOTIFY_SOCKET"), container.ID())
|
||||
+ if err != nil {
|
||||
+ return err
|
||||
+ }
|
||||
+ if err := container.Exec(); err != nil {
|
||||
+ return err
|
||||
+ }
|
||||
+ if notifySocket != nil {
|
||||
+ return notifySocket.waitForContainer(container)
|
||||
+ }
|
||||
+ return nil
|
||||
case libcontainer.Stopped:
|
||||
return errors.New("cannot start a container that has stopped")
|
||||
case libcontainer.Running:
|
||||
diff --git a/utils_linux.go b/utils_linux.go
|
||||
index 984e6b0f..46c26246 100644
|
||||
--- a/utils_linux.go
|
||||
+++ b/utils_linux.go
|
||||
@@ -408,7 +408,9 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
|
||||
|
||||
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
|
||||
if notifySocket != nil {
|
||||
- notifySocket.setupSpec(context, spec)
|
||||
+ if err := notifySocket.setupSpec(context, spec); err != nil {
|
||||
+ return -1, err
|
||||
+ }
|
||||
}
|
||||
|
||||
container, err := createContainer(context, id, spec)
|
||||
@@ -417,10 +419,16 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
|
||||
}
|
||||
|
||||
if notifySocket != nil {
|
||||
- err := notifySocket.setupSocket()
|
||||
+ err := notifySocket.setupSocketDirectory()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
+ if action == CT_ACT_RUN {
|
||||
+ err := notifySocket.bindSocket()
|
||||
+ if err != nil {
|
||||
+ return -1, err
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
// Support on-demand socket activation by passing file descriptors into the container init process.
|
||||
--
|
||||
2.21.0
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue