github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/contrib/cmd/memfd-bind/memfd-bind.go (about) 1 /* 2 * Copyright (c) 2023 SUSE LLC 3 * Copyright (c) 2023 Aleksa Sarai <cyphar@cyphar.com> 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package main 19 20 import ( 21 "errors" 22 "fmt" 23 "io" 24 "os" 25 "os/signal" 26 "runtime" 27 "strings" 28 "time" 29 30 "github.com/opencontainers/runc/libcontainer/dmz" 31 32 "github.com/sirupsen/logrus" 33 "github.com/urfave/cli" 34 "golang.org/x/sys/unix" 35 ) 36 37 // version will be populated by the Makefile, read from 38 // VERSION file of the source code. 39 var version = "" 40 41 // gitCommit will be the hash that the binary was built from 42 // and will be populated by the Makefile. 43 var gitCommit = "" 44 45 const ( 46 usage = `Open Container Initiative contrib/cmd/memfd-bind 47 48 In order to protect against certain container attacks, every runc invocation 49 that involves creating or joining a container will cause runc to make a copy of 50 the runc binary in memory (usually to a memfd). While "runc init" is very 51 short-lived, this extra memory usage can cause problems for containers with 52 very small memory limits (or containers that have many "runc exec" invocations 53 applied to them at the same time). 54 55 memfd-bind is a tool to create a persistent memfd-sealed-copy of the runc binary, 56 which will cause runc to not make its own copy. This means you can get the 57 benefits of using a sealed memfd as runc's binary (even in a container breakout 58 attack to get write access to the runc binary, neither the underlying binary 59 nor the memfd copy can be changed). 60 61 To use memfd-bind, just specify which path you want to create a socket path at 62 which you want to receive terminals: 63 64 $ sudo memfd-bind /usr/bin/runc 65 66 Note that (due to kernel restrictions on bind-mounts), this program must remain 67 running on the host in order for the binary to be readable (it is recommended 68 you use a systemd unit to keep this process around). 69 70 If this program dies, there will be a leftover mountpoint that always returns 71 -EINVAL when attempting to access it. You need to use memfd-bind --cleanup on the 72 path in order to unmount the path (regular umount(8) will not work): 73 74 $ sudo memfd-bind --cleanup /usr/bin/runc 75 76 Note that (due to restrictions on /proc/$pid/fd/$fd magic-link resolution), 77 only privileged users (specifically, those that have ptrace privileges over the 78 memfd-bind daemon) can access the memfd bind-mount. This means that using this 79 tool to harden your /usr/bin/runc binary would result in unprivileged users 80 being unable to execute the binary. If this is an issue, you could make all 81 privileged process use a different copy of runc (by making a copy in somewhere 82 like /usr/sbin/runc) and only using memfd-bind for the version used by 83 privileged users. 84 ` 85 ) 86 87 func cleanup(path string) error { 88 file, err := os.OpenFile(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) 89 if err != nil { 90 return fmt.Errorf("cleanup: failed to open runc binary path: %w", err) 91 } 92 defer file.Close() 93 fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd()) 94 95 // Keep umounting until we hit a umount error. 96 for unix.Unmount(fdPath, unix.MNT_DETACH) == nil { 97 // loop... 98 logrus.Debugf("memfd-bind: path %q unmount succeeded...", path) 99 } 100 logrus.Infof("memfd-bind: path %q has been cleared of all old bind-mounts", path) 101 return nil 102 } 103 104 // memfdClone is a memfd-only implementation of dmz.CloneBinary. 105 func memfdClone(path string) (*os.File, error) { 106 binFile, err := os.Open(path) 107 if err != nil { 108 return nil, fmt.Errorf("failed to open runc binary path: %w", err) 109 } 110 defer binFile.Close() 111 stat, err := binFile.Stat() 112 if err != nil { 113 return nil, fmt.Errorf("checking %s size: %w", path, err) 114 } 115 size := stat.Size() 116 memfd, sealFn, err := dmz.Memfd("/proc/self/exe") 117 if err != nil { 118 return nil, fmt.Errorf("creating memfd failed: %w", err) 119 } 120 copied, err := io.Copy(memfd, binFile) 121 if err != nil { 122 return nil, fmt.Errorf("copy binary: %w", err) 123 } else if copied != size { 124 return nil, fmt.Errorf("copied binary size mismatch: %d != %d", copied, size) 125 } 126 if err := sealFn(&memfd); err != nil { 127 return nil, fmt.Errorf("could not seal fd: %w", err) 128 } 129 if !dmz.IsCloned(memfd) { 130 return nil, fmt.Errorf("cloned memfd is not properly sealed") 131 } 132 return memfd, nil 133 } 134 135 func mount(path string) error { 136 memfdFile, err := memfdClone(path) 137 if err != nil { 138 return fmt.Errorf("memfd clone: %w", err) 139 } 140 defer memfdFile.Close() 141 memfdPath := fmt.Sprintf("/proc/self/fd/%d", memfdFile.Fd()) 142 143 // We have to open an O_NOFOLLOW|O_PATH to the memfd magic-link because we 144 // cannot bind-mount the memfd itself (it's in the internal kernel mount 145 // namespace and cross-mount-namespace bind-mounts are not allowed). This 146 // also requires that this program stay alive continuously for the 147 // magic-link to stay alive... 148 memfdLink, err := os.OpenFile(memfdPath, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) 149 if err != nil { 150 return fmt.Errorf("mount: failed to /proc/self/fd magic-link for memfd: %w", err) 151 } 152 defer memfdLink.Close() 153 memfdLinkFdPath := fmt.Sprintf("/proc/self/fd/%d", memfdLink.Fd()) 154 155 exeFile, err := os.OpenFile(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) 156 if err != nil { 157 return fmt.Errorf("mount: failed to open target runc binary path: %w", err) 158 } 159 defer exeFile.Close() 160 exeFdPath := fmt.Sprintf("/proc/self/fd/%d", exeFile.Fd()) 161 162 err = unix.Mount(memfdLinkFdPath, exeFdPath, "", unix.MS_BIND, "") 163 if err != nil { 164 return fmt.Errorf("mount: failed to mount memfd on top of runc binary path target: %w", err) 165 } 166 167 // If there is a signal we want to do cleanup. 168 sigCh := make(chan os.Signal, 1) 169 signal.Notify(sigCh, os.Interrupt, unix.SIGTERM, unix.SIGINT) 170 go func() { 171 <-sigCh 172 logrus.Infof("memfd-bind: exit signal caught! cleaning up the bind-mount on %q...", path) 173 _ = cleanup(path) 174 os.Exit(0) 175 }() 176 177 // Clean up things we don't need... 178 _ = exeFile.Close() 179 _ = memfdLink.Close() 180 181 // We now have to stay alive to keep the magic-link alive... 182 logrus.Infof("memfd-bind: bind-mount of memfd over %q created -- looping forever!", path) 183 for { 184 // loop forever... 185 time.Sleep(time.Duration(1<<63 - 1)) 186 // make sure the memfd isn't gc'd 187 runtime.KeepAlive(memfdFile) 188 } 189 } 190 191 func main() { 192 app := cli.NewApp() 193 app.Name = "memfd-bind" 194 app.Usage = usage 195 196 // Set version to be the same as runC. 197 var v []string 198 if version != "" { 199 v = append(v, version) 200 } 201 if gitCommit != "" { 202 v = append(v, "commit: "+gitCommit) 203 } 204 app.Version = strings.Join(v, "\n") 205 206 // Set the flags. 207 app.Flags = []cli.Flag{ 208 cli.BoolFlag{ 209 Name: "cleanup", 210 Usage: "Do not create a new memfd-sealed file, only clean up an existing one at <path>.", 211 }, 212 cli.BoolFlag{ 213 Name: "debug", 214 Usage: "Enable debug logging.", 215 }, 216 } 217 218 app.Action = func(ctx *cli.Context) error { 219 args := ctx.Args() 220 if len(args) != 1 { 221 return errors.New("need to specify a single path to the runc binary") 222 } 223 path := ctx.Args()[0] 224 225 if ctx.Bool("debug") { 226 logrus.SetLevel(logrus.DebugLevel) 227 } 228 229 err := cleanup(path) 230 // We only care about cleanup errors when doing --cleanup. 231 if ctx.Bool("cleanup") { 232 return err 233 } 234 return mount(path) 235 } 236 if err := app.Run(os.Args); err != nil { 237 fmt.Fprintf(os.Stderr, "memfd-bind: %v\n", err) 238 os.Exit(1) 239 } 240 }