github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/cmd/boot.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cmd 16 17 import ( 18 "context" 19 "os" 20 "runtime/debug" 21 "strings" 22 23 "github.com/google/subcommands" 24 specs "github.com/opencontainers/runtime-spec/specs-go" 25 "golang.org/x/sys/unix" 26 "github.com/SagerNet/gvisor/pkg/log" 27 "github.com/SagerNet/gvisor/pkg/sentry/platform" 28 "github.com/SagerNet/gvisor/runsc/boot" 29 "github.com/SagerNet/gvisor/runsc/config" 30 "github.com/SagerNet/gvisor/runsc/flag" 31 "github.com/SagerNet/gvisor/runsc/specutils" 32 ) 33 34 // Boot implements subcommands.Command for the "boot" command which starts a 35 // new sandbox. It should not be called directly. 36 type Boot struct { 37 // bundleDir is the directory containing the OCI spec. 38 bundleDir string 39 40 // specFD is the file descriptor that the spec will be read from. 41 specFD int 42 43 // controllerFD is the file descriptor of a stream socket for the 44 // control server that is donated to this process. 45 controllerFD int 46 47 // deviceFD is the file descriptor for the platform device file. 48 deviceFD int 49 50 // ioFDs is the list of FDs used to connect to FS gofers. 51 ioFDs intFlags 52 53 // stdioFDs are the fds for stdin, stdout, and stderr. They must be 54 // provided in that order. 55 stdioFDs intFlags 56 57 // applyCaps determines if capabilities defined in the spec should be applied 58 // to the process. 59 applyCaps bool 60 61 // setUpChroot is set to true if the sandbox is started in an empty root. 62 setUpRoot bool 63 64 // cpuNum number of CPUs to create inside the sandbox. 65 cpuNum int 66 67 // totalMem sets the initial amount of total memory to report back to the 68 // container. 69 totalMem uint64 70 71 // userLogFD is the file descriptor to write user logs to. 72 userLogFD int 73 74 // startSyncFD is the file descriptor to synchronize runsc and sandbox. 75 startSyncFD int 76 77 // mountsFD is the file descriptor to read list of mounts after they have 78 // been resolved (direct paths, no symlinks). They are resolved outside the 79 // sandbox (e.g. gofer) and sent through this FD. 80 mountsFD int 81 82 // pidns is set if the sandbox is in its own pid namespace. 83 pidns bool 84 85 // attached is set to true to kill the sandbox process when the parent process 86 // terminates. This flag is set when the command execve's itself because 87 // parent death signal doesn't propagate through execve when uid/gid changes. 88 attached bool 89 } 90 91 // Name implements subcommands.Command.Name. 92 func (*Boot) Name() string { 93 return "boot" 94 } 95 96 // Synopsis implements subcommands.Command.Synopsis. 97 func (*Boot) Synopsis() string { 98 return "launch a sandbox process (internal use only)" 99 } 100 101 // Usage implements subcommands.Command.Usage. 102 func (*Boot) Usage() string { 103 return `boot [flags] <container id>` 104 } 105 106 // SetFlags implements subcommands.Command.SetFlags. 107 func (b *Boot) SetFlags(f *flag.FlagSet) { 108 f.StringVar(&b.bundleDir, "bundle", "", "required path to the root of the bundle directory") 109 f.IntVar(&b.specFD, "spec-fd", -1, "required fd with the container spec") 110 f.IntVar(&b.controllerFD, "controller-fd", -1, "required FD of a stream socket for the control server that must be donated to this process") 111 f.IntVar(&b.deviceFD, "device-fd", -1, "FD for the platform device file") 112 f.Var(&b.ioFDs, "io-fds", "list of FDs to connect 9P clients. They must follow this order: root first, then mounts as defined in the spec") 113 f.Var(&b.stdioFDs, "stdio-fds", "list of FDs containing sandbox stdin, stdout, and stderr in that order") 114 f.BoolVar(&b.applyCaps, "apply-caps", false, "if true, apply capabilities defined in the spec to the process") 115 f.BoolVar(&b.setUpRoot, "setup-root", false, "if true, set up an empty root for the process") 116 f.BoolVar(&b.pidns, "pidns", false, "if true, the sandbox is in its own PID namespace") 117 f.IntVar(&b.cpuNum, "cpu-num", 0, "number of CPUs to create inside the sandbox") 118 f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container") 119 f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") 120 f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") 121 f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") 122 f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates") 123 } 124 125 // Execute implements subcommands.Command.Execute. It starts a sandbox in a 126 // waiting state. 127 func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { 128 if b.specFD == -1 || b.controllerFD == -1 || b.startSyncFD == -1 || f.NArg() != 1 { 129 f.Usage() 130 return subcommands.ExitUsageError 131 } 132 133 conf := args[0].(*config.Config) 134 135 // Set traceback level 136 debug.SetTraceback(conf.Traceback) 137 138 if b.attached { 139 // Ensure this process is killed after parent process terminates when 140 // attached mode is enabled. In the unfortunate event that the parent 141 // terminates before this point, this process leaks. 142 if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil { 143 Fatalf("error setting parent death signal: %v", err) 144 } 145 } 146 147 if b.setUpRoot { 148 if err := setUpChroot(b.pidns); err != nil { 149 Fatalf("error setting up chroot: %v", err) 150 } 151 152 if !b.applyCaps && !conf.Rootless { 153 // Remove --apply-caps arg to call myself. It has already been done. 154 args := prepareArgs(b.attached, "setup-root") 155 156 // Note that we've already read the spec from the spec FD, and 157 // we will read it again after the exec call. This works 158 // because the ReadSpecFromFile function seeks to the beginning 159 // of the file before reading. 160 Fatalf("callSelfAsNobody(%v): %v", args, callSelfAsNobody(args)) 161 panic("unreachable") 162 } 163 } 164 165 // Get the spec from the specFD. 166 specFile := os.NewFile(uintptr(b.specFD), "spec file") 167 defer specFile.Close() 168 spec, err := specutils.ReadSpecFromFile(b.bundleDir, specFile, conf) 169 if err != nil { 170 Fatalf("reading spec: %v", err) 171 } 172 specutils.LogSpec(spec) 173 174 if b.applyCaps { 175 caps := spec.Process.Capabilities 176 if caps == nil { 177 caps = &specs.LinuxCapabilities{} 178 } 179 180 gPlatform, err := platform.Lookup(conf.Platform) 181 if err != nil { 182 Fatalf("loading platform: %v", err) 183 } 184 if gPlatform.Requirements().RequiresCapSysPtrace { 185 // Ptrace platform requires extra capabilities. 186 const c = "CAP_SYS_PTRACE" 187 caps.Bounding = append(caps.Bounding, c) 188 caps.Effective = append(caps.Effective, c) 189 caps.Permitted = append(caps.Permitted, c) 190 } 191 192 // Remove --apply-caps and --setup-root arg to call myself. Both have 193 // already been done. 194 args := prepareArgs(b.attached, "setup-root", "apply-caps") 195 196 // Note that we've already read the spec from the spec FD, and 197 // we will read it again after the exec call. This works 198 // because the ReadSpecFromFile function seeks to the beginning 199 // of the file before reading. 200 Fatalf("setCapsAndCallSelf(%v, %v): %v", args, caps, setCapsAndCallSelf(args, caps)) 201 panic("unreachable") 202 } 203 204 // Read resolved mount list and replace the original one from the spec. 205 mountsFile := os.NewFile(uintptr(b.mountsFD), "mounts file") 206 cleanMounts, err := specutils.ReadMounts(mountsFile) 207 if err != nil { 208 mountsFile.Close() 209 Fatalf("Error reading mounts file: %v", err) 210 } 211 mountsFile.Close() 212 spec.Mounts = cleanMounts 213 214 // Create the loader. 215 bootArgs := boot.Args{ 216 ID: f.Arg(0), 217 Spec: spec, 218 Conf: conf, 219 ControllerFD: b.controllerFD, 220 Device: os.NewFile(uintptr(b.deviceFD), "platform device"), 221 GoferFDs: b.ioFDs.GetArray(), 222 StdioFDs: b.stdioFDs.GetArray(), 223 NumCPU: b.cpuNum, 224 TotalMem: b.totalMem, 225 UserLogFD: b.userLogFD, 226 } 227 l, err := boot.New(bootArgs) 228 if err != nil { 229 Fatalf("creating loader: %v", err) 230 } 231 232 // Fatalf exits the process and doesn't run defers. 233 // 'l' must be destroyed explicitly after this point! 234 235 // Notify the parent process the sandbox has booted (and that the controller 236 // is up). 237 startSyncFile := os.NewFile(uintptr(b.startSyncFD), "start-sync file") 238 buf := make([]byte, 1) 239 if w, err := startSyncFile.Write(buf); err != nil || w != 1 { 240 l.Destroy() 241 Fatalf("unable to write into the start-sync descriptor: %v", err) 242 } 243 // Closes startSyncFile because 'l.Run()' only returns when the sandbox exits. 244 startSyncFile.Close() 245 246 // Wait for the start signal from runsc. 247 l.WaitForStartSignal() 248 249 // Run the application and wait for it to finish. 250 if err := l.Run(); err != nil { 251 l.Destroy() 252 Fatalf("running sandbox: %v", err) 253 } 254 255 ws := l.WaitExit() 256 log.Infof("application exiting with %+v", ws) 257 waitStatus := args[1].(*unix.WaitStatus) 258 *waitStatus = unix.WaitStatus(ws.Status()) 259 l.Destroy() 260 return subcommands.ExitSuccess 261 } 262 263 func prepareArgs(attached bool, exclude ...string) []string { 264 var args []string 265 for _, arg := range os.Args { 266 for _, excl := range exclude { 267 if strings.Contains(arg, excl) { 268 goto skip 269 } 270 } 271 args = append(args, arg) 272 if attached && arg == "boot" { 273 // Strategicaly place "--attached" after the command. This is needed 274 // to ensure the new process is killed when the parent process terminates. 275 args = append(args, "--attached") 276 } 277 skip: 278 } 279 return args 280 }