github.com/containers/libpod@v1.9.4-0.20220419124438-4284fd425507/libpod/container_internal.go (about) 1 package libpod 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "path/filepath" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/containers/libpod/libpod/define" 16 "github.com/containers/libpod/libpod/events" 17 "github.com/containers/libpod/pkg/cgroups" 18 "github.com/containers/libpod/pkg/ctime" 19 "github.com/containers/libpod/pkg/hooks" 20 "github.com/containers/libpod/pkg/hooks/exec" 21 "github.com/containers/libpod/pkg/rootless" 22 "github.com/containers/storage" 23 "github.com/containers/storage/pkg/archive" 24 "github.com/containers/storage/pkg/mount" 25 securejoin "github.com/cyphar/filepath-securejoin" 26 spec "github.com/opencontainers/runtime-spec/specs-go" 27 "github.com/opencontainers/runtime-tools/generate" 28 "github.com/opencontainers/selinux/go-selinux/label" 29 "github.com/opentracing/opentracing-go" 30 "github.com/pkg/errors" 31 "github.com/sirupsen/logrus" 32 ) 33 34 const ( 35 // name of the directory holding the artifacts 36 artifactsDir = "artifacts" 37 execDirPermission = 0755 38 ) 39 40 // rootFsSize gets the size of the container's root filesystem 41 // A container FS is split into two parts. The first is the top layer, a 42 // mutable layer, and the rest is the RootFS: the set of immutable layers 43 // that make up the image on which the container is based. 44 func (c *Container) rootFsSize() (int64, error) { 45 if c.config.Rootfs != "" { 46 return 0, nil 47 } 48 if c.runtime.store == nil { 49 return 0, nil 50 } 51 52 container, err := c.runtime.store.Container(c.ID()) 53 if err != nil { 54 return 0, err 55 } 56 57 // Ignore the size of the top layer. The top layer is a mutable RW layer 58 // and is not considered a part of the rootfs 59 rwLayer, err := c.runtime.store.Layer(container.LayerID) 60 if err != nil { 61 return 0, err 62 } 63 layer, err := c.runtime.store.Layer(rwLayer.Parent) 64 if err != nil { 65 return 0, err 66 } 67 68 size := int64(0) 69 for layer.Parent != "" { 70 layerSize, err := c.runtime.store.DiffSize(layer.Parent, layer.ID) 71 if err != nil { 72 return size, errors.Wrapf(err, "getting diffsize of layer %q and its parent %q", layer.ID, layer.Parent) 73 } 74 size += layerSize 75 layer, err = c.runtime.store.Layer(layer.Parent) 76 if err != nil { 77 return 0, err 78 } 79 } 80 // Get the size of the last layer. Has to be outside of the loop 81 // because the parent of the last layer is "", and lstore.Get("") 82 // will return an error. 83 layerSize, err := c.runtime.store.DiffSize(layer.Parent, layer.ID) 84 return size + layerSize, err 85 } 86 87 // rwSize gets the size of the mutable top layer of the container. 88 func (c *Container) rwSize() (int64, error) { 89 if c.config.Rootfs != "" { 90 var size int64 91 err := filepath.Walk(c.config.Rootfs, func(path string, info os.FileInfo, err error) error { 92 if err != nil { 93 return err 94 } 95 size += info.Size() 96 return nil 97 }) 98 return size, err 99 } 100 101 container, err := c.runtime.store.Container(c.ID()) 102 if err != nil { 103 return 0, err 104 } 105 106 // The top layer of a container is 107 // the only readable/writeable layer, all others are immutable. 108 rwLayer, err := c.runtime.store.Layer(container.LayerID) 109 if err != nil { 110 return 0, err 111 } 112 113 // Get the size of the top layer by calculating the size of the diff 114 // between the layer and its parent. 115 return c.runtime.store.DiffSize(rwLayer.Parent, rwLayer.ID) 116 } 117 118 // bundlePath returns the path to the container's root filesystem - where the OCI spec will be 119 // placed, amongst other things 120 func (c *Container) bundlePath() string { 121 return c.config.StaticDir 122 } 123 124 // ControlSocketPath returns the path to the containers control socket for things like tty 125 // resizing 126 func (c *Container) ControlSocketPath() string { 127 return filepath.Join(c.bundlePath(), "ctl") 128 } 129 130 // CheckpointPath returns the path to the directory containing the checkpoint 131 func (c *Container) CheckpointPath() string { 132 return filepath.Join(c.bundlePath(), "checkpoint") 133 } 134 135 // AttachSocketPath retrieves the path of the container's attach socket 136 func (c *Container) AttachSocketPath() (string, error) { 137 return c.ociRuntime.AttachSocketPath(c) 138 } 139 140 // exitFilePath gets the path to the container's exit file 141 func (c *Container) exitFilePath() (string, error) { 142 return c.ociRuntime.ExitFilePath(c) 143 } 144 145 // Wait for the container's exit file to appear. 146 // When it does, update our state based on it. 147 func (c *Container) waitForExitFileAndSync() error { 148 exitFile, err := c.exitFilePath() 149 if err != nil { 150 return err 151 } 152 153 chWait := make(chan error) 154 defer close(chWait) 155 156 _, err = WaitForFile(exitFile, chWait, time.Second*5) 157 if err != nil { 158 // Exit file did not appear 159 // Reset our state 160 c.state.ExitCode = -1 161 c.state.FinishedTime = time.Now() 162 c.state.State = define.ContainerStateStopped 163 164 if err2 := c.save(); err2 != nil { 165 logrus.Errorf("Error saving container %s state: %v", c.ID(), err2) 166 } 167 168 return err 169 } 170 171 if err := c.checkExitFile(); err != nil { 172 return err 173 } 174 175 return c.save() 176 } 177 178 // Handle the container exit file. 179 // The exit file is used to supply container exit time and exit code. 180 // This assumes the exit file already exists. 181 func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { 182 c.state.FinishedTime = ctime.Created(fi) 183 statusCodeStr, err := ioutil.ReadFile(exitFile) 184 if err != nil { 185 return errors.Wrapf(err, "failed to read exit file for container %s", c.ID()) 186 } 187 statusCode, err := strconv.Atoi(string(statusCodeStr)) 188 if err != nil { 189 return errors.Wrapf(err, "error converting exit status code (%q) for container %s to int", 190 c.ID(), statusCodeStr) 191 } 192 c.state.ExitCode = int32(statusCode) 193 194 oomFilePath := filepath.Join(c.bundlePath(), "oom") 195 if _, err = os.Stat(oomFilePath); err == nil { 196 c.state.OOMKilled = true 197 } 198 199 c.state.Exited = true 200 201 // Write an event for the container's death 202 c.newContainerExitedEvent(c.state.ExitCode) 203 204 return nil 205 } 206 207 // Handle container restart policy. 208 // This is called when a container has exited, and was not explicitly stopped by 209 // an API call to stop the container or pod it is in. 210 func (c *Container) handleRestartPolicy(ctx context.Context) (restarted bool, err error) { 211 // If we did not get a restart policy match, exit immediately. 212 // Do the same if we're not a policy that restarts. 213 if !c.state.RestartPolicyMatch || 214 c.config.RestartPolicy == RestartPolicyNo || 215 c.config.RestartPolicy == RestartPolicyNone { 216 return false, nil 217 } 218 219 // If we're RestartPolicyOnFailure, we need to check retries and exit 220 // code. 221 if c.config.RestartPolicy == RestartPolicyOnFailure { 222 if c.state.ExitCode == 0 { 223 return false, nil 224 } 225 226 // If we don't have a max retries set, continue 227 if c.config.RestartRetries > 0 { 228 if c.state.RestartCount < c.config.RestartRetries { 229 logrus.Debugf("Container %s restart policy trigger: on retry %d (of %d)", 230 c.ID(), c.state.RestartCount, c.config.RestartRetries) 231 } else { 232 logrus.Debugf("Container %s restart policy trigger: retries exhausted", c.ID()) 233 return false, nil 234 } 235 } 236 } 237 238 logrus.Debugf("Restarting container %s due to restart policy %s", c.ID(), c.config.RestartPolicy) 239 240 // Need to check if dependencies are alive. 241 if err = c.checkDependenciesAndHandleError(ctx); err != nil { 242 return false, err 243 } 244 245 // Is the container running again? 246 // If so, we don't have to do anything 247 if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { 248 return false, nil 249 } else if c.state.State == define.ContainerStateUnknown { 250 return false, errors.Wrapf(define.ErrInternal, "invalid container state encountered in restart attempt!") 251 } 252 253 c.newContainerEvent(events.Restart) 254 255 // Increment restart count 256 c.state.RestartCount += 1 257 logrus.Debugf("Container %s now on retry %d", c.ID(), c.state.RestartCount) 258 if err := c.save(); err != nil { 259 return false, err 260 } 261 262 defer func() { 263 if err != nil { 264 if err2 := c.cleanup(ctx); err2 != nil { 265 logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) 266 } 267 } 268 }() 269 if err := c.prepare(); err != nil { 270 return false, err 271 } 272 273 if c.state.State == define.ContainerStateStopped { 274 // Reinitialize the container if we need to 275 if err := c.reinit(ctx, true); err != nil { 276 return false, err 277 } 278 } else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { 279 // Initialize the container 280 if err := c.init(ctx, true); err != nil { 281 return false, err 282 } 283 } 284 if err := c.start(); err != nil { 285 return false, err 286 } 287 return true, nil 288 } 289 290 // Ensure that the container is in a specific state or state. 291 // Returns true if the container is in one of the given states, 292 // or false otherwise. 293 func (c *Container) ensureState(states ...define.ContainerStatus) bool { 294 for _, state := range states { 295 if state == c.state.State { 296 return true 297 } 298 } 299 return false 300 } 301 302 // Sync this container with on-disk state and runtime status 303 // Should only be called with container lock held 304 // This function should suffice to ensure a container's state is accurate and 305 // it is valid for use. 306 func (c *Container) syncContainer() error { 307 if err := c.runtime.state.UpdateContainer(c); err != nil { 308 return err 309 } 310 // If runtime knows about the container, update its status in runtime 311 // And then save back to disk 312 if c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStatePaused) { 313 oldState := c.state.State 314 315 if err := c.checkExitFile(); err != nil { 316 return err 317 } 318 319 // Only save back to DB if state changed 320 if c.state.State != oldState { 321 // Check for a restart policy match 322 if c.config.RestartPolicy != RestartPolicyNone && c.config.RestartPolicy != RestartPolicyNo && 323 (oldState == define.ContainerStateRunning || oldState == define.ContainerStatePaused) && 324 (c.state.State == define.ContainerStateStopped || c.state.State == define.ContainerStateExited) && 325 !c.state.StoppedByUser { 326 c.state.RestartPolicyMatch = true 327 } 328 329 if err := c.save(); err != nil { 330 return err 331 } 332 } 333 } 334 335 if !c.valid { 336 return errors.Wrapf(define.ErrCtrRemoved, "container %s is not valid", c.ID()) 337 } 338 339 return nil 340 } 341 342 func (c *Container) setupStorageMapping(dest, from *storage.IDMappingOptions) { 343 if c.config.Rootfs != "" { 344 return 345 } 346 *dest = *from 347 if dest.AutoUserNs { 348 overrides := c.getUserOverrides() 349 dest.AutoUserNsOpts.PasswdFile = overrides.ContainerEtcPasswdPath 350 dest.AutoUserNsOpts.GroupFile = overrides.ContainerEtcGroupPath 351 if c.config.User != "" { 352 initialSize := uint32(0) 353 parts := strings.Split(c.config.User, ":") 354 for _, p := range parts { 355 s, err := strconv.ParseUint(p, 10, 32) 356 if err == nil && uint32(s) > initialSize { 357 initialSize = uint32(s) 358 } 359 } 360 dest.AutoUserNsOpts.InitialSize = initialSize + 1 361 } 362 } 363 } 364 365 // Create container root filesystem for use 366 func (c *Container) setupStorage(ctx context.Context) error { 367 span, _ := opentracing.StartSpanFromContext(ctx, "setupStorage") 368 span.SetTag("type", "container") 369 defer span.Finish() 370 371 if !c.valid { 372 return errors.Wrapf(define.ErrCtrRemoved, "container %s is not valid", c.ID()) 373 } 374 375 if c.state.State != define.ContainerStateConfigured { 376 return errors.Wrapf(define.ErrCtrStateInvalid, "container %s must be in Configured state to have storage set up", c.ID()) 377 } 378 379 // Need both an image ID and image name, plus a bool telling us whether to use the image configuration 380 if c.config.Rootfs == "" && (c.config.RootfsImageID == "" || c.config.RootfsImageName == "") { 381 return errors.Wrapf(define.ErrInvalidArg, "must provide image ID and image name to use an image") 382 } 383 384 options := storage.ContainerOptions{ 385 IDMappingOptions: storage.IDMappingOptions{ 386 HostUIDMapping: true, 387 HostGIDMapping: true, 388 }, 389 LabelOpts: c.config.LabelOpts, 390 } 391 if c.restoreFromCheckpoint { 392 // If restoring from a checkpoint, the root file-system 393 // needs to be mounted with the same SELinux labels as 394 // it was mounted previously. 395 if options.Flags == nil { 396 options.Flags = make(map[string]interface{}) 397 } 398 options.Flags["ProcessLabel"] = c.config.ProcessLabel 399 options.Flags["MountLabel"] = c.config.MountLabel 400 } 401 if c.config.Privileged { 402 privOpt := func(opt string) bool { 403 for _, privopt := range []string{"nodev", "nosuid", "noexec"} { 404 if opt == privopt { 405 return true 406 } 407 } 408 return false 409 } 410 411 defOptions, err := storage.GetMountOptions(c.runtime.store.GraphDriverName(), c.runtime.store.GraphOptions()) 412 if err != nil { 413 return errors.Wrapf(err, "error getting default mount options") 414 } 415 var newOptions []string 416 for _, opt := range defOptions { 417 if !privOpt(opt) { 418 newOptions = append(newOptions, opt) 419 } 420 } 421 options.MountOpts = newOptions 422 } 423 424 c.setupStorageMapping(&options.IDMappingOptions, &c.config.IDMappings) 425 426 containerInfo, err := c.runtime.storageService.CreateContainerStorage(ctx, c.runtime.imageContext, c.config.RootfsImageName, c.config.RootfsImageID, c.config.Name, c.config.ID, options) 427 if err != nil { 428 return errors.Wrapf(err, "error creating container storage") 429 } 430 431 c.config.IDMappings.UIDMap = containerInfo.UIDMap 432 c.config.IDMappings.GIDMap = containerInfo.GIDMap 433 c.config.ProcessLabel = containerInfo.ProcessLabel 434 c.config.MountLabel = containerInfo.MountLabel 435 c.config.StaticDir = containerInfo.Dir 436 c.state.RunDir = containerInfo.RunDir 437 438 if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 { 439 if err := os.Chown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil { 440 return err 441 } 442 443 if err := os.Chown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil { 444 return err 445 } 446 } 447 448 // Set the default Entrypoint and Command 449 if containerInfo.Config != nil { 450 if c.config.Entrypoint == nil { 451 c.config.Entrypoint = containerInfo.Config.Config.Entrypoint 452 } 453 if c.config.Command == nil { 454 c.config.Command = containerInfo.Config.Config.Cmd 455 } 456 } 457 458 artifacts := filepath.Join(c.config.StaticDir, artifactsDir) 459 if err := os.MkdirAll(artifacts, 0755); err != nil { 460 return errors.Wrapf(err, "error creating artifacts directory %q", artifacts) 461 } 462 463 return nil 464 } 465 466 // Tear down a container's storage prior to removal 467 func (c *Container) teardownStorage() error { 468 if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { 469 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove storage for container %s as it is running or paused", c.ID()) 470 } 471 472 artifacts := filepath.Join(c.config.StaticDir, artifactsDir) 473 if err := os.RemoveAll(artifacts); err != nil { 474 return errors.Wrapf(err, "error removing container %s artifacts %q", c.ID(), artifacts) 475 } 476 477 if err := c.cleanupStorage(); err != nil { 478 return errors.Wrapf(err, "failed to cleanup container %s storage", c.ID()) 479 } 480 481 if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil { 482 // If the container has already been removed, warn but do not 483 // error - we wanted it gone, it is already gone. 484 // Potentially another tool using containers/storage already 485 // removed it? 486 if errors.Cause(err) == storage.ErrNotAContainer || errors.Cause(err) == storage.ErrContainerUnknown { 487 logrus.Warnf("Storage for container %s already removed", c.ID()) 488 return nil 489 } 490 491 return errors.Wrapf(err, "error removing container %s root filesystem", c.ID()) 492 } 493 494 return nil 495 } 496 497 // Reset resets state fields to default values. 498 // It is performed before a refresh and clears the state after a reboot. 499 // It does not save the results - assumes the database will do that for us. 500 func resetState(state *ContainerState) error { 501 state.PID = 0 502 state.ConmonPID = 0 503 state.Mountpoint = "" 504 state.Mounted = false 505 if state.State != define.ContainerStateExited { 506 state.State = define.ContainerStateConfigured 507 } 508 state.ExecSessions = make(map[string]*ExecSession) 509 state.LegacyExecSessions = nil 510 state.BindMounts = make(map[string]string) 511 state.StoppedByUser = false 512 state.RestartPolicyMatch = false 513 state.RestartCount = 0 514 515 return nil 516 } 517 518 // Refresh refreshes the container's state after a restart. 519 // Refresh cannot perform any operations that would lock another container. 520 // We cannot guarantee any other container has a valid lock at the time it is 521 // running. 522 func (c *Container) refresh() error { 523 // Don't need a full sync, but we do need to update from the database to 524 // pick up potentially-missing container state 525 if err := c.runtime.state.UpdateContainer(c); err != nil { 526 return err 527 } 528 529 if !c.valid { 530 return errors.Wrapf(define.ErrCtrRemoved, "container %s is not valid - may have been removed", c.ID()) 531 } 532 533 // We need to get the container's temporary directory from c/storage 534 // It was lost in the reboot and must be recreated 535 dir, err := c.runtime.storageService.GetRunDir(c.ID()) 536 if err != nil { 537 return errors.Wrapf(err, "error retrieving temporary directory for container %s", c.ID()) 538 } 539 c.state.RunDir = dir 540 541 if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 { 542 info, err := os.Stat(c.runtime.config.Engine.TmpDir) 543 if err != nil { 544 return errors.Wrapf(err, "cannot stat `%s`", c.runtime.config.Engine.TmpDir) 545 } 546 if err := os.Chmod(c.runtime.config.Engine.TmpDir, info.Mode()|0111); err != nil { 547 return errors.Wrapf(err, "cannot chmod `%s`", c.runtime.config.Engine.TmpDir) 548 } 549 root := filepath.Join(c.runtime.config.Engine.TmpDir, "containers-root", c.ID()) 550 if err := os.MkdirAll(root, 0755); err != nil { 551 return errors.Wrapf(err, "error creating userNS tmpdir for container %s", c.ID()) 552 } 553 if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil { 554 return err 555 } 556 } 557 558 // We need to pick up a new lock 559 lock, err := c.runtime.lockManager.AllocateAndRetrieveLock(c.config.LockID) 560 if err != nil { 561 return errors.Wrapf(err, "error acquiring lock %d for container %s", c.config.LockID, c.ID()) 562 } 563 c.lock = lock 564 565 // Try to delete any lingering IP allocations. 566 // If this fails, just log and ignore. 567 // I'm a little concerned that this is so far down in refresh() and we 568 // could fail before getting to it - but the worst that would happen is 569 // that Inspect() would return info on IPs we no longer own. 570 if len(c.state.NetworkStatus) > 0 { 571 if err := c.removeIPv4Allocations(); err != nil { 572 logrus.Errorf("Error removing IP allocations for container %s: %v", c.ID(), err) 573 } 574 } 575 c.state.NetworkStatus = nil 576 577 if err := c.save(); err != nil { 578 return errors.Wrapf(err, "error refreshing state for container %s", c.ID()) 579 } 580 581 // Remove ctl and attach files, which may persist across reboot 582 if err := c.removeConmonFiles(); err != nil { 583 return err 584 } 585 586 return nil 587 } 588 589 // Try and remove IP address allocations. Presently IPv4 only. 590 // Should be safe as rootless because NetworkStatus should only be populated if 591 // CNI is running. 592 func (c *Container) removeIPv4Allocations() error { 593 cniNetworksDir, err := getCNINetworksDir() 594 if err != nil { 595 return err 596 } 597 598 if len(c.state.NetworkStatus) == 0 { 599 return nil 600 } 601 602 cniDefaultNetwork := "" 603 if c.runtime.netPlugin != nil { 604 cniDefaultNetwork = c.runtime.netPlugin.GetDefaultNetworkName() 605 } 606 607 switch { 608 case len(c.config.Networks) > 0 && len(c.config.Networks) != len(c.state.NetworkStatus): 609 return errors.Wrapf(define.ErrInternal, "network mismatch: asked to join %d CNI networks but got %d CNI results", len(c.config.Networks), len(c.state.NetworkStatus)) 610 case len(c.config.Networks) == 0 && len(c.state.NetworkStatus) != 1: 611 return errors.Wrapf(define.ErrInternal, "network mismatch: did not specify CNI networks but joined more than one (%d)", len(c.state.NetworkStatus)) 612 case len(c.config.Networks) == 0 && cniDefaultNetwork == "": 613 return errors.Wrapf(define.ErrInternal, "could not retrieve name of CNI default network") 614 } 615 616 for index, result := range c.state.NetworkStatus { 617 for _, ctrIP := range result.IPs { 618 if ctrIP.Version != "4" { 619 continue 620 } 621 candidate := "" 622 if len(c.config.Networks) > 0 { 623 // CNI returns networks in order we passed them. 624 // So our index into results should be our index 625 // into networks. 626 candidate = filepath.Join(cniNetworksDir, c.config.Networks[index], ctrIP.Address.IP.String()) 627 } else { 628 candidate = filepath.Join(cniNetworksDir, cniDefaultNetwork, ctrIP.Address.IP.String()) 629 } 630 logrus.Debugf("Going to try removing IP address reservation file %q for container %s", candidate, c.ID()) 631 if err := os.Remove(candidate); err != nil && !os.IsNotExist(err) { 632 return errors.Wrapf(err, "error removing CNI IP reservation file %q for container %s", candidate, c.ID()) 633 } 634 } 635 } 636 637 return nil 638 } 639 640 // Remove conmon attach socket and terminal resize FIFO 641 // This is necessary for restarting containers 642 func (c *Container) removeConmonFiles() error { 643 // Files are allowed to not exist, so ignore ENOENT 644 attachFile := filepath.Join(c.bundlePath(), "attach") 645 if err := os.Remove(attachFile); err != nil && !os.IsNotExist(err) { 646 return errors.Wrapf(err, "error removing container %s attach file", c.ID()) 647 } 648 649 ctlFile := filepath.Join(c.bundlePath(), "ctl") 650 if err := os.Remove(ctlFile); err != nil && !os.IsNotExist(err) { 651 return errors.Wrapf(err, "error removing container %s ctl file", c.ID()) 652 } 653 654 winszFile := filepath.Join(c.bundlePath(), "winsz") 655 if err := os.Remove(winszFile); err != nil && !os.IsNotExist(err) { 656 return errors.Wrapf(err, "error removing container %s winsz file", c.ID()) 657 } 658 659 oomFile := filepath.Join(c.bundlePath(), "oom") 660 if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) { 661 return errors.Wrapf(err, "error removing container %s OOM file", c.ID()) 662 } 663 664 // Remove the exit file so we don't leak memory in tmpfs 665 exitFile, err := c.exitFilePath() 666 if err != nil { 667 return err 668 } 669 if err := os.Remove(exitFile); err != nil && !os.IsNotExist(err) { 670 return errors.Wrapf(err, "error removing container %s exit file", c.ID()) 671 } 672 673 return nil 674 } 675 676 func (c *Container) export(path string) error { 677 mountPoint := c.state.Mountpoint 678 if !c.state.Mounted { 679 containerMount, err := c.runtime.store.Mount(c.ID(), c.config.MountLabel) 680 if err != nil { 681 return errors.Wrapf(err, "error mounting container %q", c.ID()) 682 } 683 mountPoint = containerMount 684 defer func() { 685 if _, err := c.runtime.store.Unmount(c.ID(), false); err != nil { 686 logrus.Errorf("error unmounting container %q: %v", c.ID(), err) 687 } 688 }() 689 } 690 691 input, err := archive.Tar(mountPoint, archive.Uncompressed) 692 if err != nil { 693 return errors.Wrapf(err, "error reading container directory %q", c.ID()) 694 } 695 696 outFile, err := os.Create(path) 697 if err != nil { 698 return errors.Wrapf(err, "error creating file %q", path) 699 } 700 defer outFile.Close() 701 702 _, err = io.Copy(outFile, input) 703 return err 704 } 705 706 // Get path of artifact with a given name for this container 707 func (c *Container) getArtifactPath(name string) string { 708 return filepath.Join(c.config.StaticDir, artifactsDir, name) 709 } 710 711 // Used with Wait() to determine if a container has exited 712 func (c *Container) isStopped() (bool, error) { 713 if !c.batched { 714 c.lock.Lock() 715 defer c.lock.Unlock() 716 } 717 err := c.syncContainer() 718 if err != nil { 719 return true, err 720 } 721 722 return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused), nil 723 } 724 725 // save container state to the database 726 func (c *Container) save() error { 727 if err := c.runtime.state.SaveContainer(c); err != nil { 728 return errors.Wrapf(err, "error saving container %s state", c.ID()) 729 } 730 return nil 731 } 732 733 // Checks the container is in the right state, then initializes the container in preparation to start the container. 734 // If recursive is true, each of the containers dependencies will be started. 735 // Otherwise, this function will return with error if there are dependencies of this container that aren't running. 736 func (c *Container) prepareToStart(ctx context.Context, recursive bool) (err error) { 737 // Container must be created or stopped to be started 738 if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) { 739 return errors.Wrapf(define.ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID()) 740 } 741 742 if !recursive { 743 if err := c.checkDependenciesAndHandleError(ctx); err != nil { 744 return err 745 } 746 } else { 747 if err := c.startDependencies(ctx); err != nil { 748 return err 749 } 750 } 751 752 defer func() { 753 if err != nil { 754 if err2 := c.cleanup(ctx); err2 != nil { 755 logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) 756 } 757 } 758 }() 759 760 if err := c.prepare(); err != nil { 761 return err 762 } 763 764 if c.state.State == define.ContainerStateStopped { 765 // Reinitialize the container if we need to 766 if err := c.reinit(ctx, false); err != nil { 767 return err 768 } 769 } else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { 770 // Or initialize it if necessary 771 if err := c.init(ctx, false); err != nil { 772 return err 773 } 774 } 775 return nil 776 } 777 778 // checks dependencies are running and prints a helpful message 779 func (c *Container) checkDependenciesAndHandleError(ctx context.Context) error { 780 notRunning, err := c.checkDependenciesRunning() 781 if err != nil { 782 return errors.Wrapf(err, "error checking dependencies for container %s", c.ID()) 783 } 784 if len(notRunning) > 0 { 785 depString := strings.Join(notRunning, ",") 786 return errors.Wrapf(define.ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString) 787 } 788 789 return nil 790 } 791 792 // Recursively start all dependencies of a container so the container can be started. 793 func (c *Container) startDependencies(ctx context.Context) error { 794 depCtrIDs := c.Dependencies() 795 if len(depCtrIDs) == 0 { 796 return nil 797 } 798 799 depVisitedCtrs := make(map[string]*Container) 800 if err := c.getAllDependencies(depVisitedCtrs); err != nil { 801 return errors.Wrapf(err, "error starting dependency for container %s", c.ID()) 802 } 803 804 // Because of how Go handles passing slices through functions, a slice cannot grow between function calls 805 // without clunky syntax. Circumnavigate this by translating the map to a slice for buildContainerGraph 806 depCtrs := make([]*Container, 0) 807 for _, ctr := range depVisitedCtrs { 808 depCtrs = append(depCtrs, ctr) 809 } 810 811 // Build a dependency graph of containers 812 graph, err := BuildContainerGraph(depCtrs) 813 if err != nil { 814 return errors.Wrapf(err, "error generating dependency graph for container %s", c.ID()) 815 } 816 817 // If there are no containers without dependencies, we can't start 818 // Error out 819 if len(graph.noDepNodes) == 0 { 820 // we have no dependencies that need starting, go ahead and return 821 if len(graph.nodes) == 0 { 822 return nil 823 } 824 return errors.Wrapf(define.ErrNoSuchCtr, "All dependencies have dependencies of %s", c.ID()) 825 } 826 827 ctrErrors := make(map[string]error) 828 ctrsVisited := make(map[string]bool) 829 830 // Traverse the graph beginning at nodes with no dependencies 831 for _, node := range graph.noDepNodes { 832 startNode(ctx, node, false, ctrErrors, ctrsVisited, true) 833 } 834 835 if len(ctrErrors) > 0 { 836 logrus.Errorf("error starting some container dependencies") 837 for _, e := range ctrErrors { 838 logrus.Errorf("%q", e) 839 } 840 return errors.Wrapf(define.ErrInternal, "error starting some containers") 841 } 842 return nil 843 } 844 845 // getAllDependencies is a precursor to starting dependencies. 846 // To start a container with all of its dependencies, we need to recursively find all dependencies 847 // a container has, as well as each of those containers' dependencies, and so on 848 // To do so, keep track of containers already visisted (so there aren't redundant state lookups), 849 // and recursively search until we have reached the leafs of every dependency node. 850 // Since we need to start all dependencies for our original container to successfully start, we propegate any errors 851 // in looking up dependencies. 852 // Note: this function is currently meant as a robust solution to a narrow problem: start an infra-container when 853 // a container in the pod is run. It has not been tested for performance past one level, so expansion of recursive start 854 // must be tested first. 855 func (c *Container) getAllDependencies(visited map[string]*Container) error { 856 depIDs := c.Dependencies() 857 if len(depIDs) == 0 { 858 return nil 859 } 860 for _, depID := range depIDs { 861 if _, ok := visited[depID]; !ok { 862 dep, err := c.runtime.state.Container(depID) 863 if err != nil { 864 return err 865 } 866 status, err := dep.State() 867 if err != nil { 868 return err 869 } 870 // if the dependency is already running, we can assume its dependencies are also running 871 // so no need to add them to those we need to start 872 if status != define.ContainerStateRunning { 873 visited[depID] = dep 874 if err := dep.getAllDependencies(visited); err != nil { 875 return err 876 } 877 } 878 } 879 } 880 return nil 881 } 882 883 // Check if a container's dependencies are running 884 // Returns a []string containing the IDs of dependencies that are not running 885 func (c *Container) checkDependenciesRunning() ([]string, error) { 886 deps := c.Dependencies() 887 notRunning := []string{} 888 889 // We were not passed a set of dependency containers 890 // Make it ourselves 891 depCtrs := make(map[string]*Container, len(deps)) 892 for _, dep := range deps { 893 // Get the dependency container 894 depCtr, err := c.runtime.state.Container(dep) 895 if err != nil { 896 return nil, errors.Wrapf(err, "error retrieving dependency %s of container %s from state", dep, c.ID()) 897 } 898 899 // Check the status 900 state, err := depCtr.State() 901 if err != nil { 902 return nil, errors.Wrapf(err, "error retrieving state of dependency %s of container %s", dep, c.ID()) 903 } 904 if state != define.ContainerStateRunning { 905 notRunning = append(notRunning, dep) 906 } 907 depCtrs[dep] = depCtr 908 } 909 910 return notRunning, nil 911 } 912 913 func (c *Container) completeNetworkSetup() error { 914 var outResolvConf []string 915 netDisabled, err := c.NetworkDisabled() 916 if err != nil { 917 return err 918 } 919 if !c.config.PostConfigureNetNS || netDisabled { 920 return nil 921 } 922 if err := c.syncContainer(); err != nil { 923 return err 924 } 925 if c.config.NetMode.IsSlirp4netns() { 926 return c.runtime.setupRootlessNetNS(c) 927 } 928 if err := c.runtime.setupNetNS(c); err != nil { 929 return err 930 } 931 state := c.state 932 // collect any dns servers that cni tells us to use (dnsname) 933 for _, cni := range state.NetworkStatus { 934 if cni.DNS.Nameservers != nil { 935 for _, server := range cni.DNS.Nameservers { 936 outResolvConf = append(outResolvConf, fmt.Sprintf("nameserver %s", server)) 937 } 938 } 939 } 940 // check if we have a bindmount for resolv.conf 941 resolvBindMount := state.BindMounts["/etc/resolv.conf"] 942 if len(outResolvConf) < 1 || resolvBindMount == "" || len(c.config.NetNsCtr) > 0 { 943 return nil 944 } 945 // read the existing resolv.conf 946 b, err := ioutil.ReadFile(resolvBindMount) 947 if err != nil { 948 return err 949 } 950 for _, line := range strings.Split(string(b), "\n") { 951 // only keep things that don't start with nameserver from the old 952 // resolv.conf file 953 if !strings.HasPrefix(line, "nameserver") { 954 outResolvConf = append([]string{line}, outResolvConf...) 955 } 956 } 957 // write and return 958 return ioutil.WriteFile(resolvBindMount, []byte(strings.Join(outResolvConf, "\n")), 0644) 959 } 960 961 // Initialize a container, creating it in the runtime 962 func (c *Container) init(ctx context.Context, retainRetries bool) error { 963 span, _ := opentracing.StartSpanFromContext(ctx, "init") 964 span.SetTag("struct", "container") 965 defer span.Finish() 966 967 // Unconditionally remove conmon temporary files. 968 // We've been running into far too many issues where they block startup. 969 if err := c.removeConmonFiles(); err != nil { 970 return err 971 } 972 973 // Generate the OCI newSpec 974 newSpec, err := c.generateSpec(ctx) 975 if err != nil { 976 return err 977 } 978 979 // Save the OCI newSpec to disk 980 if err := c.saveSpec(newSpec); err != nil { 981 return err 982 } 983 984 // With the spec complete, do an OCI create 985 if err := c.ociRuntime.CreateContainer(c, nil); err != nil { 986 // Fedora 31 is carrying a patch to display improved error 987 // messages to better handle the V2 transition. This is NOT 988 // upstream in any OCI runtime. 989 // TODO: Remove once runc supports cgroupsv2 990 if strings.Contains(err.Error(), "this version of runc doesn't work on cgroups v2") { 991 logrus.Errorf("oci runtime %q does not support CGroups V2: use system migrate to mitigate", c.ociRuntime.Name()) 992 } 993 return err 994 } 995 996 logrus.Debugf("Created container %s in OCI runtime", c.ID()) 997 998 c.state.ExitCode = 0 999 c.state.Exited = false 1000 c.state.State = define.ContainerStateCreated 1001 c.state.StoppedByUser = false 1002 c.state.RestartPolicyMatch = false 1003 1004 if !retainRetries { 1005 c.state.RestartCount = 0 1006 } 1007 1008 if err := c.save(); err != nil { 1009 return err 1010 } 1011 if c.config.HealthCheckConfig != nil { 1012 if err := c.createTimer(); err != nil { 1013 logrus.Error(err) 1014 } 1015 } 1016 1017 defer c.newContainerEvent(events.Init) 1018 return c.completeNetworkSetup() 1019 } 1020 1021 // Clean up a container in the OCI runtime. 1022 // Deletes the container in the runtime, and resets its state to Exited. 1023 // The container can be restarted cleanly after this. 1024 func (c *Container) cleanupRuntime(ctx context.Context) error { 1025 span, _ := opentracing.StartSpanFromContext(ctx, "cleanupRuntime") 1026 span.SetTag("struct", "container") 1027 defer span.Finish() 1028 1029 // If the container is not ContainerStateStopped or 1030 // ContainerStateCreated, do nothing. 1031 if !c.ensureState(define.ContainerStateStopped, define.ContainerStateCreated) { 1032 return nil 1033 } 1034 1035 // If necessary, delete attach and ctl files 1036 if err := c.removeConmonFiles(); err != nil { 1037 return err 1038 } 1039 1040 if err := c.delete(ctx); err != nil { 1041 return err 1042 } 1043 1044 // If we were Stopped, we are now Exited, as we've removed ourself 1045 // from the runtime. 1046 // If we were Created, we are now Configured. 1047 if c.state.State == define.ContainerStateStopped { 1048 c.state.State = define.ContainerStateExited 1049 } else if c.state.State == define.ContainerStateCreated { 1050 c.state.State = define.ContainerStateConfigured 1051 } 1052 1053 if c.valid { 1054 if err := c.save(); err != nil { 1055 return err 1056 } 1057 } 1058 1059 logrus.Debugf("Successfully cleaned up container %s", c.ID()) 1060 1061 return nil 1062 } 1063 1064 // Reinitialize a container. 1065 // Deletes and recreates a container in the runtime. 1066 // Should only be done on ContainerStateStopped containers. 1067 // Not necessary for ContainerStateExited - the container has already been 1068 // removed from the runtime, so init() can proceed freely. 1069 func (c *Container) reinit(ctx context.Context, retainRetries bool) error { 1070 span, _ := opentracing.StartSpanFromContext(ctx, "reinit") 1071 span.SetTag("struct", "container") 1072 defer span.Finish() 1073 1074 logrus.Debugf("Recreating container %s in OCI runtime", c.ID()) 1075 1076 if err := c.cleanupRuntime(ctx); err != nil { 1077 return err 1078 } 1079 1080 // Initialize the container again 1081 return c.init(ctx, retainRetries) 1082 } 1083 1084 // Initialize (if necessary) and start a container 1085 // Performs all necessary steps to start a container that is not running 1086 // Does not lock or check validity 1087 func (c *Container) initAndStart(ctx context.Context) (err error) { 1088 // If we are ContainerStateUnknown, throw an error 1089 if c.state.State == define.ContainerStateUnknown { 1090 return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is in an unknown state", c.ID()) 1091 } else if c.state.State == define.ContainerStateRemoving { 1092 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot start container %s as it is being removed", c.ID()) 1093 } 1094 1095 // If we are running, do nothing 1096 if c.state.State == define.ContainerStateRunning { 1097 return nil 1098 } 1099 // If we are paused, throw an error 1100 if c.state.State == define.ContainerStatePaused { 1101 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot start paused container %s", c.ID()) 1102 } 1103 1104 defer func() { 1105 if err != nil { 1106 if err2 := c.cleanup(ctx); err2 != nil { 1107 logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) 1108 } 1109 } 1110 }() 1111 1112 if err := c.prepare(); err != nil { 1113 return err 1114 } 1115 1116 // If we are ContainerStateStopped we need to remove from runtime 1117 // And reset to ContainerStateConfigured 1118 if c.state.State == define.ContainerStateStopped { 1119 logrus.Debugf("Recreating container %s in OCI runtime", c.ID()) 1120 1121 if err := c.reinit(ctx, false); err != nil { 1122 return err 1123 } 1124 } else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { 1125 if err := c.init(ctx, false); err != nil { 1126 return err 1127 } 1128 } 1129 1130 // Now start the container 1131 return c.start() 1132 } 1133 1134 // Internal, non-locking function to start a container 1135 func (c *Container) start() error { 1136 if c.config.Spec.Process != nil { 1137 logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args) 1138 } 1139 1140 if err := c.ociRuntime.StartContainer(c); err != nil { 1141 return err 1142 } 1143 logrus.Debugf("Started container %s", c.ID()) 1144 1145 c.state.State = define.ContainerStateRunning 1146 1147 if c.config.HealthCheckConfig != nil { 1148 if err := c.updateHealthStatus(HealthCheckStarting); err != nil { 1149 logrus.Error(err) 1150 } 1151 if err := c.startTimer(); err != nil { 1152 logrus.Error(err) 1153 } 1154 } 1155 1156 defer c.newContainerEvent(events.Start) 1157 1158 return c.save() 1159 } 1160 1161 // Internal, non-locking function to stop container 1162 func (c *Container) stop(timeout uint) error { 1163 logrus.Debugf("Stopping ctr %s (timeout %d)", c.ID(), timeout) 1164 1165 // If the container is running in a PID Namespace, then killing the 1166 // primary pid is enough to kill the container. If it is not running in 1167 // a pid namespace then the OCI Runtime needs to kill ALL processes in 1168 // the containers cgroup in order to make sure the container is stopped. 1169 all := !c.hasNamespace(spec.PIDNamespace) 1170 // We can't use --all if CGroups aren't present. 1171 // Rootless containers with CGroups v1 and NoCgroups are both cases 1172 // where this can happen. 1173 if all { 1174 if c.config.NoCgroups { 1175 all = false 1176 } else if rootless.IsRootless() { 1177 // Only do this check if we need to 1178 unified, err := cgroups.IsCgroup2UnifiedMode() 1179 if err != nil { 1180 return err 1181 } 1182 if !unified { 1183 all = false 1184 } 1185 } 1186 } 1187 1188 if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil { 1189 return err 1190 } 1191 1192 c.state.PID = 0 1193 c.state.ConmonPID = 0 1194 c.state.StoppedByUser = true 1195 if err := c.save(); err != nil { 1196 return errors.Wrapf(err, "error saving container %s state after stopping", c.ID()) 1197 } 1198 1199 // Wait until we have an exit file, and sync once we do 1200 if err := c.waitForExitFileAndSync(); err != nil { 1201 return err 1202 } 1203 1204 c.newContainerEvent(events.Stop) 1205 1206 return nil 1207 } 1208 1209 // Internal, non-locking function to pause a container 1210 func (c *Container) pause() error { 1211 if c.config.NoCgroups { 1212 return errors.Wrapf(define.ErrNoCgroups, "cannot pause without using CGroups") 1213 } 1214 1215 if rootless.IsRootless() { 1216 cgroupv2, err := cgroups.IsCgroup2UnifiedMode() 1217 if err != nil { 1218 return errors.Wrap(err, "failed to determine cgroupversion") 1219 } 1220 if !cgroupv2 { 1221 return errors.Wrap(define.ErrNoCgroups, "can not pause containers on rootless containers with cgroup V1") 1222 } 1223 } 1224 1225 if err := c.ociRuntime.PauseContainer(c); err != nil { 1226 // TODO when using docker-py there is some sort of race/incompatibility here 1227 return err 1228 } 1229 1230 logrus.Debugf("Paused container %s", c.ID()) 1231 1232 c.state.State = define.ContainerStatePaused 1233 1234 return c.save() 1235 } 1236 1237 // Internal, non-locking function to unpause a container 1238 func (c *Container) unpause() error { 1239 if c.config.NoCgroups { 1240 return errors.Wrapf(define.ErrNoCgroups, "cannot unpause without using CGroups") 1241 } 1242 1243 if err := c.ociRuntime.UnpauseContainer(c); err != nil { 1244 // TODO when using docker-py there is some sort of race/incompatibility here 1245 return err 1246 } 1247 1248 logrus.Debugf("Unpaused container %s", c.ID()) 1249 1250 c.state.State = define.ContainerStateRunning 1251 1252 return c.save() 1253 } 1254 1255 // Internal, non-locking function to restart a container 1256 func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (err error) { 1257 if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) { 1258 return errors.Wrapf(define.ErrCtrStateInvalid, "unable to restart a container in a paused or unknown state") 1259 } 1260 1261 c.newContainerEvent(events.Restart) 1262 1263 if c.state.State == define.ContainerStateRunning { 1264 conmonPID := c.state.ConmonPID 1265 if err := c.stop(timeout); err != nil { 1266 return err 1267 } 1268 // Old versions of conmon have a bug where they create the exit file before 1269 // closing open file descriptors causing a race condition when restarting 1270 // containers with open ports since we cannot bind the ports as they're not 1271 // yet closed by conmon. 1272 // 1273 // Killing the old conmon PID is ~okay since it forces the FDs of old conmons 1274 // to be closed, while it's a NOP for newer versions which should have 1275 // exited already. 1276 if conmonPID != 0 { 1277 // Ignore errors from FindProcess() as conmon could already have exited. 1278 p, err := os.FindProcess(conmonPID) 1279 if p != nil && err == nil { 1280 if err = p.Kill(); err != nil { 1281 logrus.Debugf("error killing conmon process: %v", err) 1282 } 1283 } 1284 } 1285 // Ensure we tear down the container network so it will be 1286 // recreated - otherwise, behavior of restart differs from stop 1287 // and start 1288 if err := c.cleanupNetwork(); err != nil { 1289 return err 1290 } 1291 } 1292 defer func() { 1293 if err != nil { 1294 if err2 := c.cleanup(ctx); err2 != nil { 1295 logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) 1296 } 1297 } 1298 }() 1299 if err := c.prepare(); err != nil { 1300 return err 1301 } 1302 1303 if c.state.State == define.ContainerStateStopped { 1304 // Reinitialize the container if we need to 1305 if err := c.reinit(ctx, false); err != nil { 1306 return err 1307 } 1308 } else if c.state.State == define.ContainerStateConfigured || 1309 c.state.State == define.ContainerStateExited { 1310 // Initialize the container 1311 if err := c.init(ctx, false); err != nil { 1312 return err 1313 } 1314 } 1315 return c.start() 1316 } 1317 1318 // mountStorage sets up the container's root filesystem 1319 // It mounts the image and any other requested mounts 1320 // TODO: Add ability to override mount label so we can use this for Mount() too 1321 // TODO: Can we use this for export? Copying SHM into the export might not be 1322 // good 1323 func (c *Container) mountStorage() (_ string, deferredErr error) { 1324 var err error 1325 // Container already mounted, nothing to do 1326 if c.state.Mounted { 1327 return c.state.Mountpoint, nil 1328 } 1329 1330 mounted, err := mount.Mounted(c.config.ShmDir) 1331 if err != nil { 1332 return "", errors.Wrapf(err, "unable to determine if %q is mounted", c.config.ShmDir) 1333 } 1334 1335 if !mounted && !MountExists(c.config.Spec.Mounts, "/dev/shm") { 1336 shmOptions := fmt.Sprintf("mode=1777,size=%d", c.config.ShmSize) 1337 if err := c.mountSHM(shmOptions); err != nil { 1338 return "", err 1339 } 1340 if err := os.Chown(c.config.ShmDir, c.RootUID(), c.RootGID()); err != nil { 1341 return "", errors.Wrapf(err, "failed to chown %s", c.config.ShmDir) 1342 } 1343 defer func() { 1344 if deferredErr != nil { 1345 if err := c.unmountSHM(c.config.ShmDir); err != nil { 1346 logrus.Errorf("Error unmounting SHM for container %s after mount error: %v", c.ID(), err) 1347 } 1348 } 1349 }() 1350 } 1351 1352 // We need to mount the container before volumes - to ensure the copyup 1353 // works properly. 1354 mountPoint := c.config.Rootfs 1355 if mountPoint == "" { 1356 mountPoint, err = c.mount() 1357 if err != nil { 1358 return "", err 1359 } 1360 defer func() { 1361 if deferredErr != nil { 1362 if err := c.unmount(false); err != nil { 1363 logrus.Errorf("Error unmounting container %s after mount error: %v", c.ID(), err) 1364 } 1365 } 1366 }() 1367 } 1368 1369 // Request a mount of all named volumes 1370 for _, v := range c.config.NamedVolumes { 1371 vol, err := c.mountNamedVolume(v, mountPoint) 1372 if err != nil { 1373 return "", err 1374 } 1375 defer func() { 1376 if deferredErr == nil { 1377 return 1378 } 1379 vol.lock.Lock() 1380 if err := vol.unmount(false); err != nil { 1381 logrus.Errorf("Error unmounting volume %s after error mounting container %s: %v", vol.Name(), c.ID(), err) 1382 } 1383 vol.lock.Unlock() 1384 }() 1385 } 1386 1387 return mountPoint, nil 1388 } 1389 1390 // Mount a single named volume into the container. 1391 // If necessary, copy up image contents into the volume. 1392 // Does not verify that the name volume given is actually present in container 1393 // config. 1394 // Returns the volume that was mounted. 1395 func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) (*Volume, error) { 1396 vol, err := c.runtime.state.Volume(v.Name) 1397 if err != nil { 1398 return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) 1399 } 1400 1401 if vol.config.LockID == c.config.LockID { 1402 return nil, errors.Wrapf(define.ErrWillDeadlock, "container %s and volume %s share lock ID %d", c.ID(), vol.Name(), c.config.LockID) 1403 } 1404 vol.lock.Lock() 1405 defer vol.lock.Unlock() 1406 if vol.needsMount() { 1407 if err := vol.mount(); err != nil { 1408 return nil, errors.Wrapf(err, "error mounting volume %s for container %s", vol.Name(), c.ID()) 1409 } 1410 } 1411 // The volume may need a copy-up. Check the state. 1412 if err := vol.update(); err != nil { 1413 return nil, err 1414 } 1415 if vol.state.NeedsCopyUp { 1416 logrus.Debugf("Copying up contents from container %s to volume %s", c.ID(), vol.Name()) 1417 1418 // Set NeedsCopyUp to false immediately, so we don't try this 1419 // again when there are already files copied. 1420 vol.state.NeedsCopyUp = false 1421 if err := vol.save(); err != nil { 1422 return nil, err 1423 } 1424 1425 // If the volume is not empty, we should not copy up. 1426 volMount := vol.MountPoint() 1427 contents, err := ioutil.ReadDir(volMount) 1428 if err != nil { 1429 return nil, errors.Wrapf(err, "error listing contents of volume %s mountpoint when copying up from container %s", vol.Name(), c.ID()) 1430 } 1431 if len(contents) > 0 { 1432 // The volume is not empty. It was likely modified 1433 // outside of Podman. For safety, let's not copy up into 1434 // it. Fixes CVE-2020-1726. 1435 return vol, nil 1436 } 1437 1438 srcDir, err := securejoin.SecureJoin(mountpoint, v.Dest) 1439 if err != nil { 1440 return nil, errors.Wrapf(err, "error calculating destination path to copy up container %s volume %s", c.ID(), vol.Name()) 1441 } 1442 if err := c.copyWithTarFromImage(srcDir, volMount); err != nil && !os.IsNotExist(err) { 1443 return nil, errors.Wrapf(err, "error copying content from container %s into volume %s", c.ID(), vol.Name()) 1444 } 1445 } 1446 return vol, nil 1447 } 1448 1449 // cleanupStorage unmounts and cleans up the container's root filesystem 1450 func (c *Container) cleanupStorage() error { 1451 if !c.state.Mounted { 1452 // Already unmounted, do nothing 1453 logrus.Debugf("Container %s storage is already unmounted, skipping...", c.ID()) 1454 return nil 1455 } 1456 1457 var cleanupErr error 1458 1459 for _, containerMount := range c.config.Mounts { 1460 if err := c.unmountSHM(containerMount); err != nil { 1461 if cleanupErr != nil { 1462 logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr) 1463 } 1464 cleanupErr = err 1465 } 1466 } 1467 1468 if c.config.Rootfs != "" { 1469 return cleanupErr 1470 } 1471 1472 if err := c.unmount(false); err != nil { 1473 // If the container has already been removed, warn but don't 1474 // error 1475 // We still want to be able to kick the container out of the 1476 // state 1477 if errors.Cause(err) == storage.ErrNotAContainer || errors.Cause(err) == storage.ErrContainerUnknown || errors.Cause(err) == storage.ErrLayerNotMounted { 1478 logrus.Errorf("Storage for container %s has been removed", c.ID()) 1479 } else { 1480 if cleanupErr != nil { 1481 logrus.Errorf("Error cleaning up container %s storage: %v", c.ID(), cleanupErr) 1482 } 1483 cleanupErr = err 1484 } 1485 } 1486 1487 // Request an unmount of all named volumes 1488 for _, v := range c.config.NamedVolumes { 1489 vol, err := c.runtime.state.Volume(v.Name) 1490 if err != nil { 1491 if cleanupErr != nil { 1492 logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr) 1493 } 1494 cleanupErr = errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) 1495 1496 // We need to try and unmount every volume, so continue 1497 // if they fail. 1498 continue 1499 } 1500 1501 if vol.needsMount() { 1502 vol.lock.Lock() 1503 if err := vol.unmount(false); err != nil { 1504 if cleanupErr != nil { 1505 logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr) 1506 } 1507 cleanupErr = errors.Wrapf(err, "error unmounting volume %s for container %s", vol.Name(), c.ID()) 1508 } 1509 vol.lock.Unlock() 1510 } 1511 } 1512 1513 c.state.Mountpoint = "" 1514 c.state.Mounted = false 1515 1516 if c.valid { 1517 if err := c.save(); err != nil { 1518 if cleanupErr != nil { 1519 logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr) 1520 } 1521 cleanupErr = err 1522 } 1523 } 1524 return cleanupErr 1525 } 1526 1527 // Unmount the a container and free its resources 1528 func (c *Container) cleanup(ctx context.Context) error { 1529 var lastError error 1530 1531 span, _ := opentracing.StartSpanFromContext(ctx, "cleanup") 1532 span.SetTag("struct", "container") 1533 defer span.Finish() 1534 1535 logrus.Debugf("Cleaning up container %s", c.ID()) 1536 1537 // Remove healthcheck unit/timer file if it execs 1538 if c.config.HealthCheckConfig != nil { 1539 if err := c.removeTimer(); err != nil { 1540 logrus.Errorf("Error removing timer for container %s healthcheck: %v", c.ID(), err) 1541 } 1542 } 1543 1544 // Clean up network namespace, if present 1545 if err := c.cleanupNetwork(); err != nil { 1546 lastError = errors.Wrapf(err, "error removing container %s network", c.ID()) 1547 } 1548 1549 // Unmount storage 1550 if err := c.cleanupStorage(); err != nil { 1551 if lastError != nil { 1552 logrus.Errorf("Error unmounting container %s storage: %v", c.ID(), err) 1553 } else { 1554 lastError = errors.Wrapf(err, "error unmounting container %s storage", c.ID()) 1555 } 1556 } 1557 1558 // Remove the container from the runtime, if necessary 1559 if err := c.cleanupRuntime(ctx); err != nil { 1560 if lastError != nil { 1561 logrus.Errorf("Error removing container %s from OCI runtime: %v", c.ID(), err) 1562 } else { 1563 lastError = err 1564 } 1565 } 1566 1567 return lastError 1568 } 1569 1570 // delete deletes the container and runs any configured poststop 1571 // hooks. 1572 func (c *Container) delete(ctx context.Context) (err error) { 1573 span, _ := opentracing.StartSpanFromContext(ctx, "delete") 1574 span.SetTag("struct", "container") 1575 defer span.Finish() 1576 1577 if err := c.ociRuntime.DeleteContainer(c); err != nil { 1578 return errors.Wrapf(err, "error removing container %s from runtime", c.ID()) 1579 } 1580 1581 if err := c.postDeleteHooks(ctx); err != nil { 1582 return errors.Wrapf(err, "container %s poststop hooks", c.ID()) 1583 } 1584 1585 return nil 1586 } 1587 1588 // postDeleteHooks runs the poststop hooks (if any) as specified by 1589 // the OCI Runtime Specification (which requires them to run 1590 // post-delete, despite the stage name). 1591 func (c *Container) postDeleteHooks(ctx context.Context) (err error) { 1592 span, _ := opentracing.StartSpanFromContext(ctx, "postDeleteHooks") 1593 span.SetTag("struct", "container") 1594 defer span.Finish() 1595 1596 if c.state.ExtensionStageHooks != nil { 1597 extensionHooks, ok := c.state.ExtensionStageHooks["poststop"] 1598 if ok { 1599 state, err := json.Marshal(spec.State{ 1600 Version: spec.Version, 1601 ID: c.ID(), 1602 Status: "stopped", 1603 Bundle: c.bundlePath(), 1604 Annotations: c.config.Spec.Annotations, 1605 }) 1606 if err != nil { 1607 return err 1608 } 1609 for i, hook := range extensionHooks { 1610 hook := hook 1611 logrus.Debugf("container %s: invoke poststop hook %d, path %s", c.ID(), i, hook.Path) 1612 var stderr, stdout bytes.Buffer 1613 hookErr, err := exec.Run(ctx, &hook, state, &stdout, &stderr, exec.DefaultPostKillTimeout) 1614 if err != nil { 1615 logrus.Warnf("container %s: poststop hook %d: %v", c.ID(), i, err) 1616 if hookErr != err { 1617 logrus.Debugf("container %s: poststop hook %d (hook error): %v", c.ID(), i, hookErr) 1618 } 1619 stdoutString := stdout.String() 1620 if stdoutString != "" { 1621 logrus.Debugf("container %s: poststop hook %d: stdout:\n%s", c.ID(), i, stdoutString) 1622 } 1623 stderrString := stderr.String() 1624 if stderrString != "" { 1625 logrus.Debugf("container %s: poststop hook %d: stderr:\n%s", c.ID(), i, stderrString) 1626 } 1627 } 1628 } 1629 } 1630 } 1631 1632 return nil 1633 } 1634 1635 // writeStringToRundir copies the provided file to the runtimedir 1636 func (c *Container) writeStringToRundir(destFile, output string) (string, error) { 1637 destFileName := filepath.Join(c.state.RunDir, destFile) 1638 1639 if err := os.Remove(destFileName); err != nil && !os.IsNotExist(err) { 1640 return "", errors.Wrapf(err, "error removing %s for container %s", destFile, c.ID()) 1641 } 1642 1643 f, err := os.Create(destFileName) 1644 if err != nil { 1645 return "", errors.Wrapf(err, "unable to create %s", destFileName) 1646 } 1647 defer f.Close() 1648 if err := f.Chown(c.RootUID(), c.RootGID()); err != nil { 1649 return "", err 1650 } 1651 1652 if _, err := f.WriteString(output); err != nil { 1653 return "", errors.Wrapf(err, "unable to write %s", destFileName) 1654 } 1655 // Relabel runDirResolv for the container 1656 if err := label.Relabel(destFileName, c.config.MountLabel, false); err != nil { 1657 return "", err 1658 } 1659 1660 return filepath.Join(c.state.RunDir, destFile), nil 1661 } 1662 1663 // appendStringToRundir appends the provided string to the runtimedir file 1664 func (c *Container) appendStringToRundir(destFile, output string) (string, error) { 1665 destFileName := filepath.Join(c.state.RunDir, destFile) 1666 1667 f, err := os.OpenFile(destFileName, os.O_APPEND|os.O_WRONLY, 0600) 1668 if err != nil { 1669 return "", errors.Wrapf(err, "unable to open %s", destFileName) 1670 } 1671 defer f.Close() 1672 1673 if _, err := f.WriteString(output); err != nil { 1674 return "", errors.Wrapf(err, "unable to write %s", destFileName) 1675 } 1676 1677 return filepath.Join(c.state.RunDir, destFile), nil 1678 } 1679 1680 // saveSpec saves the OCI spec to disk, replacing any existing specs for the container 1681 func (c *Container) saveSpec(spec *spec.Spec) error { 1682 // If the OCI spec already exists, we need to replace it 1683 // Cannot guarantee some things, e.g. network namespaces, have the same 1684 // paths 1685 jsonPath := filepath.Join(c.bundlePath(), "config.json") 1686 if _, err := os.Stat(jsonPath); err != nil { 1687 if !os.IsNotExist(err) { 1688 return errors.Wrapf(err, "error doing stat on container %s spec", c.ID()) 1689 } 1690 // The spec does not exist, we're fine 1691 } else { 1692 // The spec exists, need to remove it 1693 if err := os.Remove(jsonPath); err != nil { 1694 return errors.Wrapf(err, "error replacing runtime spec for container %s", c.ID()) 1695 } 1696 } 1697 1698 fileJSON, err := json.Marshal(spec) 1699 if err != nil { 1700 return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID()) 1701 } 1702 if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil { 1703 return errors.Wrapf(err, "error writing runtime spec JSON for container %s to disk", c.ID()) 1704 } 1705 1706 logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath) 1707 1708 c.state.ConfigPath = jsonPath 1709 1710 return nil 1711 } 1712 1713 // Warning: precreate hooks may alter 'config' in place. 1714 func (c *Container) setupOCIHooks(ctx context.Context, config *spec.Spec) (extensionStageHooks map[string][]spec.Hook, err error) { 1715 allHooks := make(map[string][]spec.Hook) 1716 if c.runtime.config.Engine.HooksDir == nil { 1717 if rootless.IsRootless() { 1718 return nil, nil 1719 } 1720 for _, hDir := range []string{hooks.DefaultDir, hooks.OverrideDir} { 1721 manager, err := hooks.New(ctx, []string{hDir}, []string{"precreate", "poststop"}) 1722 if err != nil { 1723 if os.IsNotExist(err) { 1724 continue 1725 } 1726 return nil, err 1727 } 1728 ociHooks, err := manager.Hooks(config, c.Spec().Annotations, len(c.config.UserVolumes) > 0) 1729 if err != nil { 1730 return nil, err 1731 } 1732 if len(ociHooks) > 0 || config.Hooks != nil { 1733 logrus.Warnf("implicit hook directories are deprecated; set --ociHooks-dir=%q explicitly to continue to load ociHooks from this directory", hDir) 1734 } 1735 for i, hook := range ociHooks { 1736 allHooks[i] = hook 1737 } 1738 } 1739 } else { 1740 manager, err := hooks.New(ctx, c.runtime.config.Engine.HooksDir, []string{"precreate", "poststop"}) 1741 if err != nil { 1742 return nil, err 1743 } 1744 1745 allHooks, err = manager.Hooks(config, c.Spec().Annotations, len(c.config.UserVolumes) > 0) 1746 if err != nil { 1747 return nil, err 1748 } 1749 } 1750 1751 hookErr, err := exec.RuntimeConfigFilter(ctx, allHooks["precreate"], config, exec.DefaultPostKillTimeout) 1752 if err != nil { 1753 logrus.Warnf("container %s: precreate hook: %v", c.ID(), err) 1754 if hookErr != nil && hookErr != err { 1755 logrus.Debugf("container %s: precreate hook (hook error): %v", c.ID(), hookErr) 1756 } 1757 return nil, err 1758 } 1759 1760 return allHooks, nil 1761 } 1762 1763 // mount mounts the container's root filesystem 1764 func (c *Container) mount() (string, error) { 1765 mountPoint, err := c.runtime.storageService.MountContainerImage(c.ID()) 1766 if err != nil { 1767 return "", errors.Wrapf(err, "error mounting storage for container %s", c.ID()) 1768 } 1769 mountPoint, err = filepath.EvalSymlinks(mountPoint) 1770 if err != nil { 1771 return "", errors.Wrapf(err, "error resolving storage path for container %s", c.ID()) 1772 } 1773 if err := os.Chown(mountPoint, c.RootUID(), c.RootGID()); err != nil { 1774 return "", errors.Wrapf(err, "cannot chown %s to %d:%d", mountPoint, c.RootUID(), c.RootGID()) 1775 } 1776 return mountPoint, nil 1777 } 1778 1779 // unmount unmounts the container's root filesystem 1780 func (c *Container) unmount(force bool) error { 1781 // Also unmount storage 1782 if _, err := c.runtime.storageService.UnmountContainerImage(c.ID(), force); err != nil { 1783 return errors.Wrapf(err, "error unmounting container %s root filesystem", c.ID()) 1784 } 1785 1786 return nil 1787 } 1788 1789 // this should be from chrootarchive. 1790 // Container MUST be mounted before calling. 1791 func (c *Container) copyWithTarFromImage(source, dest string) error { 1792 a := archive.NewDefaultArchiver() 1793 1794 if err := c.copyOwnerAndPerms(source, dest); err != nil { 1795 return err 1796 } 1797 return a.CopyWithTar(source, dest) 1798 } 1799 1800 // checkReadyForRemoval checks whether the given container is ready to be 1801 // removed. 1802 // These checks are only used if force-remove is not specified. 1803 // If it is, we'll remove the container anyways. 1804 // Returns nil if safe to remove, or an error describing why it's unsafe if not. 1805 func (c *Container) checkReadyForRemoval() error { 1806 if c.state.State == define.ContainerStateUnknown { 1807 return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is in invalid state", c.ID()) 1808 } 1809 1810 if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { 1811 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed without force", c.ID(), c.state.State.String()) 1812 } 1813 1814 // Check exec sessions 1815 sessions, err := c.getActiveExecSessions() 1816 if err != nil { 1817 return err 1818 } 1819 if len(sessions) != 0 { 1820 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID()) 1821 } 1822 1823 return nil 1824 } 1825 1826 // writeJSONFile marshalls and writes the given data to a JSON file 1827 // in the bundle path 1828 func (c *Container) writeJSONFile(v interface{}, file string) (err error) { 1829 fileJSON, err := json.MarshalIndent(v, "", " ") 1830 if err != nil { 1831 return errors.Wrapf(err, "error writing JSON to %s for container %s", file, c.ID()) 1832 } 1833 file = filepath.Join(c.bundlePath(), file) 1834 if err := ioutil.WriteFile(file, fileJSON, 0644); err != nil { 1835 return err 1836 } 1837 1838 return nil 1839 } 1840 1841 // prepareCheckpointExport writes the config and spec to 1842 // JSON files for later export 1843 func (c *Container) prepareCheckpointExport() (err error) { 1844 // save live config 1845 if err := c.writeJSONFile(c.Config(), "config.dump"); err != nil { 1846 return err 1847 } 1848 1849 // save spec 1850 jsonPath := filepath.Join(c.bundlePath(), "config.json") 1851 g, err := generate.NewFromFile(jsonPath) 1852 if err != nil { 1853 logrus.Debugf("generating spec for container %q failed with %v", c.ID(), err) 1854 return err 1855 } 1856 if err := c.writeJSONFile(g.Config, "spec.dump"); err != nil { 1857 return err 1858 } 1859 1860 return nil 1861 } 1862 1863 // sortUserVolumes sorts the volumes specified for a container 1864 // between named and normal volumes 1865 func (c *Container) sortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume, []spec.Mount) { 1866 namedUserVolumes := []*ContainerNamedVolume{} 1867 userMounts := []spec.Mount{} 1868 1869 // We need to parse all named volumes and mounts into maps, so we don't 1870 // end up with repeated lookups for each user volume. 1871 // Map destination to struct, as destination is what is stored in 1872 // UserVolumes. 1873 namedVolumes := make(map[string]*ContainerNamedVolume) 1874 mounts := make(map[string]spec.Mount) 1875 for _, namedVol := range c.config.NamedVolumes { 1876 namedVolumes[namedVol.Dest] = namedVol 1877 } 1878 for _, mount := range ctrSpec.Mounts { 1879 mounts[mount.Destination] = mount 1880 } 1881 1882 for _, vol := range c.config.UserVolumes { 1883 if volume, ok := namedVolumes[vol]; ok { 1884 namedUserVolumes = append(namedUserVolumes, volume) 1885 } else if mount, ok := mounts[vol]; ok { 1886 userMounts = append(userMounts, mount) 1887 } else { 1888 logrus.Warnf("Could not find mount at destination %q when parsing user volumes for container %s", vol, c.ID()) 1889 } 1890 } 1891 return namedUserVolumes, userMounts 1892 } 1893 1894 // Check for an exit file, and handle one if present 1895 func (c *Container) checkExitFile() error { 1896 // If the container's not running, nothing to do. 1897 if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { 1898 return nil 1899 } 1900 1901 exitFile, err := c.exitFilePath() 1902 if err != nil { 1903 return err 1904 } 1905 1906 // Check for the exit file 1907 info, err := os.Stat(exitFile) 1908 if err != nil { 1909 if os.IsNotExist(err) { 1910 // Container is still running, no error 1911 return nil 1912 } 1913 1914 return errors.Wrapf(err, "error running stat on container %s exit file", c.ID()) 1915 } 1916 1917 // Alright, it exists. Transition to Stopped state. 1918 c.state.State = define.ContainerStateStopped 1919 c.state.PID = 0 1920 c.state.ConmonPID = 0 1921 1922 // Read the exit file to get our stopped time and exit code. 1923 return c.handleExitFile(exitFile, info) 1924 } 1925 1926 func (c *Container) hasNamespace(namespace spec.LinuxNamespaceType) bool { 1927 if c.config.Spec == nil || c.config.Spec.Linux == nil { 1928 return false 1929 } 1930 for _, n := range c.config.Spec.Linux.Namespaces { 1931 if n.Type == namespace { 1932 return true 1933 } 1934 } 1935 return false 1936 }