github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/dmz/cloned_binary_linux.go (about) 1 package dmz 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "os" 8 "strconv" 9 10 "github.com/sirupsen/logrus" 11 "golang.org/x/sys/unix" 12 13 "github.com/opencontainers/runc/libcontainer/system" 14 ) 15 16 type SealFunc func(**os.File) error 17 18 var ( 19 _ SealFunc = sealMemfd 20 _ SealFunc = sealFile 21 ) 22 23 func isExecutable(f *os.File) bool { 24 if err := unix.Faccessat(int(f.Fd()), "", unix.X_OK, unix.AT_EACCESS|unix.AT_EMPTY_PATH); err == nil { 25 return true 26 } else if err == unix.EACCES { 27 return false 28 } 29 path := "/proc/self/fd/" + strconv.Itoa(int(f.Fd())) 30 if err := unix.Access(path, unix.X_OK); err == nil { 31 return true 32 } else if err == unix.EACCES { 33 return false 34 } 35 // Cannot check -- assume it's executable (if not, exec will fail). 36 logrus.Debugf("cannot do X_OK check on binary %s -- assuming it's executable", f.Name()) 37 return true 38 } 39 40 const baseMemfdSeals = unix.F_SEAL_SEAL | unix.F_SEAL_SHRINK | unix.F_SEAL_GROW | unix.F_SEAL_WRITE 41 42 func sealMemfd(f **os.File) error { 43 if err := (*f).Chmod(0o511); err != nil { 44 return err 45 } 46 // Try to set the newer memfd sealing flags, but we ignore 47 // errors because they are not needed and we want to continue 48 // to work on older kernels. 49 fd := (*f).Fd() 50 // F_SEAL_FUTURE_WRITE -- Linux 5.1 51 _, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, unix.F_SEAL_FUTURE_WRITE) 52 // F_SEAL_EXEC -- Linux 6.3 53 const F_SEAL_EXEC = 0x20 //nolint:revive // this matches the unix.* name 54 _, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, F_SEAL_EXEC) 55 // Apply all original memfd seals. 56 _, err := unix.FcntlInt(fd, unix.F_ADD_SEALS, baseMemfdSeals) 57 return os.NewSyscallError("fcntl(F_ADD_SEALS)", err) 58 } 59 60 // Memfd creates a sealable executable memfd (supported since Linux 3.17). 61 func Memfd(comment string) (*os.File, SealFunc, error) { 62 file, err := system.ExecutableMemfd("runc_cloned:"+comment, unix.MFD_ALLOW_SEALING|unix.MFD_CLOEXEC) 63 return file, sealMemfd, err 64 } 65 66 func sealFile(f **os.File) error { 67 if err := (*f).Chmod(0o511); err != nil { 68 return err 69 } 70 // When sealing an O_TMPFILE-style descriptor we need to 71 // re-open the path as O_PATH to clear the existing write 72 // handle we have. 73 opath, err := os.OpenFile(fmt.Sprintf("/proc/self/fd/%d", (*f).Fd()), unix.O_PATH|unix.O_CLOEXEC, 0) 74 if err != nil { 75 return fmt.Errorf("reopen tmpfile: %w", err) 76 } 77 _ = (*f).Close() 78 *f = opath 79 return nil 80 } 81 82 // otmpfile creates an open(O_TMPFILE) file in the given directory (supported 83 // since Linux 3.11). 84 func otmpfile(dir string) (*os.File, SealFunc, error) { 85 file, err := os.OpenFile(dir, unix.O_TMPFILE|unix.O_RDWR|unix.O_EXCL|unix.O_CLOEXEC, 0o700) 86 if err != nil { 87 return nil, nil, fmt.Errorf("O_TMPFILE creation failed: %w", err) 88 } 89 // Make sure we actually got an unlinked O_TMPFILE descriptor. 90 var stat unix.Stat_t 91 if err := unix.Fstat(int(file.Fd()), &stat); err != nil { 92 file.Close() 93 return nil, nil, fmt.Errorf("cannot fstat O_TMPFILE fd: %w", err) 94 } else if stat.Nlink != 0 { 95 file.Close() 96 return nil, nil, errors.New("O_TMPFILE has non-zero nlink") 97 } 98 return file, sealFile, err 99 } 100 101 // mktemp creates a classic unlinked file in the given directory. 102 func mktemp(dir string) (*os.File, SealFunc, error) { 103 file, err := os.CreateTemp(dir, "runc.") 104 if err != nil { 105 return nil, nil, err 106 } 107 // Unlink the file and verify it was unlinked. 108 if err := os.Remove(file.Name()); err != nil { 109 return nil, nil, fmt.Errorf("unlinking classic tmpfile: %w", err) 110 } 111 var stat unix.Stat_t 112 if err := unix.Fstat(int(file.Fd()), &stat); err != nil { 113 return nil, nil, fmt.Errorf("cannot fstat classic tmpfile: %w", err) 114 } else if stat.Nlink != 0 { 115 return nil, nil, fmt.Errorf("classic tmpfile %s has non-zero nlink after unlink", file.Name()) 116 } 117 return file, sealFile, err 118 } 119 120 func getSealableFile(comment, tmpDir string) (file *os.File, sealFn SealFunc, err error) { 121 // First, try an executable memfd (supported since Linux 3.17). 122 file, sealFn, err = Memfd(comment) 123 if err == nil { 124 return 125 } 126 logrus.Debugf("memfd cloned binary failed, falling back to O_TMPFILE: %v", err) 127 128 // The tmpDir here (c.root) might be mounted noexec, so we need a couple of 129 // fallbacks to try. It's possible that none of these are writable and 130 // executable, in which case there's nothing we can practically do (other 131 // than mounting our own executable tmpfs, which would have its own 132 // issues). 133 tmpDirs := []string{ 134 tmpDir, 135 os.TempDir(), 136 "/tmp", 137 ".", 138 "/bin", 139 "/", 140 } 141 142 // Try to fallback to O_TMPFILE (supported since Linux 3.11). 143 for _, dir := range tmpDirs { 144 file, sealFn, err = otmpfile(dir) 145 if err != nil { 146 continue 147 } 148 if !isExecutable(file) { 149 logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir) 150 file.Close() 151 continue 152 } 153 return 154 } 155 logrus.Debugf("O_TMPFILE cloned binary failed, falling back to mktemp(): %v", err) 156 // Finally, try a classic unlinked temporary file. 157 for _, dir := range tmpDirs { 158 file, sealFn, err = mktemp(dir) 159 if err != nil { 160 continue 161 } 162 if !isExecutable(file) { 163 logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir) 164 file.Close() 165 continue 166 } 167 return 168 } 169 return nil, nil, fmt.Errorf("could not create sealable file for cloned binary: %w", err) 170 } 171 172 // CloneBinary creates a "sealed" clone of a given binary, which can be used to 173 // thwart attempts by the container process to gain access to host binaries 174 // through procfs magic-link shenanigans. For more details on why this is 175 // necessary, see CVE-2019-5736. 176 func CloneBinary(src io.Reader, size int64, name, tmpDir string) (*os.File, error) { 177 logrus.Debugf("cloning %s binary (%d bytes)", name, size) 178 file, sealFn, err := getSealableFile(name, tmpDir) 179 if err != nil { 180 return nil, err 181 } 182 copied, err := system.Copy(file, src) 183 if err != nil { 184 file.Close() 185 return nil, fmt.Errorf("copy binary: %w", err) 186 } else if copied != size { 187 file.Close() 188 return nil, fmt.Errorf("copied binary size mismatch: %d != %d", copied, size) 189 } 190 if err := sealFn(&file); err != nil { 191 file.Close() 192 return nil, fmt.Errorf("could not seal fd: %w", err) 193 } 194 return file, nil 195 } 196 197 // IsCloned returns whether the given file can be guaranteed to be a safe exe. 198 func IsCloned(exe *os.File) bool { 199 seals, err := unix.FcntlInt(exe.Fd(), unix.F_GET_SEALS, 0) 200 if err != nil { 201 // /proc/self/exe is probably not a memfd 202 logrus.Debugf("F_GET_SEALS on %s failed: %v", exe.Name(), err) 203 return false 204 } 205 // The memfd must have all of the base seals applied. 206 logrus.Debugf("checking %s memfd seals: 0x%x", exe.Name(), seals) 207 return seals&baseMemfdSeals == baseMemfdSeals 208 } 209 210 // CloneSelfExe makes a clone of the current process's binary (through 211 // /proc/self/exe). This binary can then be used for "runc init" in order to 212 // make sure the container process can never resolve the original runc binary. 213 // For more details on why this is necessary, see CVE-2019-5736. 214 func CloneSelfExe(tmpDir string) (*os.File, error) { 215 selfExe, err := os.Open("/proc/self/exe") 216 if err != nil { 217 return nil, fmt.Errorf("opening current binary: %w", err) 218 } 219 defer selfExe.Close() 220 221 stat, err := selfExe.Stat() 222 if err != nil { 223 return nil, fmt.Errorf("checking /proc/self/exe size: %w", err) 224 } 225 size := stat.Size() 226 227 return CloneBinary(selfExe, size, "/proc/self/exe", tmpDir) 228 } 229 230 // IsSelfExeCloned returns whether /proc/self/exe is a cloned binary that can 231 // be guaranteed to be safe. This means that it must be a sealed memfd. Other 232 // types of clones cannot be completely verified as safe. 233 func IsSelfExeCloned() bool { 234 selfExe, err := os.Open("/proc/self/exe") 235 if err != nil { 236 logrus.Debugf("open /proc/self/exe failed: %v", err) 237 return false 238 } 239 defer selfExe.Close() 240 return IsCloned(selfExe) 241 }