init: close internal fds before execve · opencontainers/runc@284ba30 (original) (raw)
`@@ -7,8 +7,11 @@ import (
`
7
7
`"fmt"
`
8
8
`"os"
`
9
9
`"strconv"
`
``
10
`+
_ "unsafe" // for go:linkname
`
10
11
``
11
12
`"golang.org/x/sys/unix"
`
``
13
+
``
14
`+
"github.com/opencontainers/runc/libcontainer/logs"
`
12
15
`)
`
13
16
``
14
17
`// EnsureProcHandle returns whether or not the given file handle is on procfs.
`
`@@ -23,9 +26,11 @@ func EnsureProcHandle(fh *os.File) error {
`
23
26
`return nil
`
24
27
`}
`
25
28
``
26
``
`-
// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
`
27
``
`-
// the process (except for those below the given fd value).
`
28
``
`-
func CloseExecFrom(minFd int) error {
`
``
29
`+
type fdFunc func(fd int)
`
``
30
+
``
31
`+
// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
`
``
32
`+
// the current process.
`
``
33
`+
func fdRangeFrom(minFd int, fn fdFunc) error {
`
29
34
`fdDir, err := os.Open("/proc/self/fd")
`
30
35
`if err != nil {
`
31
36
`return err
`
`@@ -50,15 +55,66 @@ func CloseExecFrom(minFd int) error {
`
50
55
`if fd < minFd {
`
51
56
`continue
`
52
57
` }
`
53
``
`-
// Intentionally ignore errors from unix.CloseOnExec -- the cases where
`
54
``
`-
// this might fail are basically file descriptors that have already
`
55
``
`-
// been closed (including and especially the one that was created when
`
56
``
`-
// os.ReadDir did the "opendir" syscall).
`
57
``
`-
unix.CloseOnExec(fd)
`
``
58
`+
// Ignore the file descriptor we used for readdir, as it will be closed
`
``
59
`+
// when we return.
`
``
60
`+
if uintptr(fd) == fdDir.Fd() {
`
``
61
`+
continue
`
``
62
`+
}
`
``
63
`+
// Run the closure.
`
``
64
`+
fn(fd)
`
58
65
` }
`
59
66
`return nil
`
60
67
`}
`
61
68
``
``
69
`+
// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
`
``
70
`+
// equal to minFd in the current process.
`
``
71
`+
func CloseExecFrom(minFd int) error {
`
``
72
`+
return fdRangeFrom(minFd, unix.CloseOnExec)
`
``
73
`+
}
`
``
74
+
``
75
`+
//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
`
``
76
+
``
77
`+
// In order to make sure we do not close the internal epoll descriptors the Go
`
``
78
`+
// runtime uses, we need to ensure that we skip descriptors that match
`
``
79
`+
// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
`
``
80
`+
// unfortunately there's no other way to be sure we're only keeping the file
`
``
81
`+
// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
`
``
82
`+
func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
`
``
83
+
``
84
`+
// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
`
``
85
`+
// current process, except for those critical to Go's runtime (such as the
`
``
86
`+
// netpoll management descriptors).
`
``
87
`+
//
`
``
88
`+
// NOTE: That this function is incredibly dangerous to use in most Go code, as
`
``
89
`+
// closing file descriptors from underneath *os.File handles can lead to very
`
``
90
`+
// bad behaviour (the closed file descriptor can be re-used and then any
`
``
91
`+
// *os.File operations would apply to the wrong file). This function is only
`
``
92
`+
// intended to be called from the last stage of runc init.
`
``
93
`+
func UnsafeCloseFrom(minFd int) error {
`
``
94
`+
// We must not close some file descriptors.
`
``
95
`+
return fdRangeFrom(minFd, func(fd int) {
`
``
96
`+
if runtime_IsPollDescriptor(uintptr(fd)) {
`
``
97
`+
// These are the Go runtimes internal netpoll file descriptors.
`
``
98
`+
// These file descriptors are operated on deep in the Go scheduler,
`
``
99
`+
// and closing those files from underneath Go can result in panics.
`
``
100
`+
// There is no issue with keeping them because they are not
`
``
101
`+
// executable and are not useful to an attacker anyway. Also we
`
``
102
`+
// don't have any choice.
`
``
103
`+
return
`
``
104
`+
}
`
``
105
`+
if logs.IsLogrusFd(uintptr(fd)) {
`
``
106
`+
// Do not close the logrus output fd. We cannot exec a pipe, and
`
``
107
`+
// the contents are quite limited (very little attacker control,
`
``
108
`+
// JSON-encoded) making shellcode attacks unlikely.
`
``
109
`+
return
`
``
110
`+
}
`
``
111
`+
// There's nothing we can do about errors from close(2), and the
`
``
112
`+
// only likely error to be seen is EBADF which indicates the fd was
`
``
113
`+
// already closed (in which case, we got what we wanted).
`
``
114
`+
_ = unix.Close(fd)
`
``
115
`+
})
`
``
116
`+
}
`
``
117
+
62
118
`// NewSockPair returns a new unix socket pair
`
63
119
`func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
`
64
120
`fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
`