github.com/AbhinandanKurakure/podman/v3@v3.4.10/libpod/container_internal.go (about) 1 package libpod 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "os" 11 "path/filepath" 12 "strconv" 13 "strings" 14 "time" 15 16 metadata "github.com/checkpoint-restore/checkpointctl/lib" 17 "github.com/containers/buildah/copier" 18 butil "github.com/containers/buildah/util" 19 "github.com/containers/podman/v3/libpod/define" 20 "github.com/containers/podman/v3/libpod/events" 21 "github.com/containers/podman/v3/pkg/cgroups" 22 "github.com/containers/podman/v3/pkg/ctime" 23 "github.com/containers/podman/v3/pkg/hooks" 24 "github.com/containers/podman/v3/pkg/hooks/exec" 25 "github.com/containers/podman/v3/pkg/rootless" 26 "github.com/containers/podman/v3/pkg/selinux" 27 "github.com/containers/podman/v3/pkg/util" 28 "github.com/containers/storage" 29 "github.com/containers/storage/pkg/archive" 30 "github.com/containers/storage/pkg/idtools" 31 "github.com/containers/storage/pkg/mount" 32 "github.com/coreos/go-systemd/v22/daemon" 33 securejoin "github.com/cyphar/filepath-securejoin" 34 spec "github.com/opencontainers/runtime-spec/specs-go" 35 "github.com/opencontainers/runtime-tools/generate" 36 "github.com/opencontainers/selinux/go-selinux/label" 37 "github.com/pkg/errors" 38 "github.com/sirupsen/logrus" 39 "golang.org/x/sys/unix" 40 ) 41 42 const ( 43 // name of the directory holding the artifacts 44 artifactsDir = "artifacts" 45 execDirPermission = 0755 46 preCheckpointDir = "pre-checkpoint" 47 ) 48 49 // rootFsSize gets the size of the container's root filesystem 50 // A container FS is split into two parts. The first is the top layer, a 51 // mutable layer, and the rest is the RootFS: the set of immutable layers 52 // that make up the image on which the container is based. 53 func (c *Container) rootFsSize() (int64, error) { 54 if c.config.Rootfs != "" { 55 return 0, nil 56 } 57 if c.runtime.store == nil { 58 return 0, nil 59 } 60 61 container, err := c.runtime.store.Container(c.ID()) 62 if err != nil { 63 return 0, err 64 } 65 66 // Ignore the size of the top layer. The top layer is a mutable RW layer 67 // and is not considered a part of the rootfs 68 rwLayer, err := c.runtime.store.Layer(container.LayerID) 69 if err != nil { 70 return 0, err 71 } 72 layer, err := c.runtime.store.Layer(rwLayer.Parent) 73 if err != nil { 74 return 0, err 75 } 76 77 size := int64(0) 78 for layer.Parent != "" { 79 layerSize, err := c.runtime.store.DiffSize(layer.Parent, layer.ID) 80 if err != nil { 81 return size, errors.Wrapf(err, "getting diffsize of layer %q and its parent %q", layer.ID, layer.Parent) 82 } 83 size += layerSize 84 layer, err = c.runtime.store.Layer(layer.Parent) 85 if err != nil { 86 return 0, err 87 } 88 } 89 // Get the size of the last layer. Has to be outside of the loop 90 // because the parent of the last layer is "", and lstore.Get("") 91 // will return an error. 92 layerSize, err := c.runtime.store.DiffSize(layer.Parent, layer.ID) 93 return size + layerSize, err 94 } 95 96 // rwSize gets the size of the mutable top layer of the container. 97 func (c *Container) rwSize() (int64, error) { 98 if c.config.Rootfs != "" { 99 var size int64 100 err := filepath.Walk(c.config.Rootfs, func(path string, info os.FileInfo, err error) error { 101 if err != nil { 102 return err 103 } 104 size += info.Size() 105 return nil 106 }) 107 return size, err 108 } 109 110 container, err := c.runtime.store.Container(c.ID()) 111 if err != nil { 112 return 0, err 113 } 114 115 // The top layer of a container is 116 // the only readable/writeable layer, all others are immutable. 117 rwLayer, err := c.runtime.store.Layer(container.LayerID) 118 if err != nil { 119 return 0, err 120 } 121 122 // Get the size of the top layer by calculating the size of the diff 123 // between the layer and its parent. 124 return c.runtime.store.DiffSize(rwLayer.Parent, rwLayer.ID) 125 } 126 127 // bundlePath returns the path to the container's root filesystem - where the OCI spec will be 128 // placed, amongst other things 129 func (c *Container) bundlePath() string { 130 return c.config.StaticDir 131 } 132 133 // ControlSocketPath returns the path to the containers control socket for things like tty 134 // resizing 135 func (c *Container) ControlSocketPath() string { 136 return filepath.Join(c.bundlePath(), "ctl") 137 } 138 139 // CheckpointPath returns the path to the directory containing the checkpoint 140 func (c *Container) CheckpointPath() string { 141 return filepath.Join(c.bundlePath(), metadata.CheckpointDirectory) 142 } 143 144 // PreCheckpointPath returns the path to the directory containing the pre-checkpoint-images 145 func (c *Container) PreCheckPointPath() string { 146 return filepath.Join(c.bundlePath(), preCheckpointDir) 147 } 148 149 // AttachSocketPath retrieves the path of the container's attach socket 150 func (c *Container) AttachSocketPath() (string, error) { 151 return c.ociRuntime.AttachSocketPath(c) 152 } 153 154 // exitFilePath gets the path to the container's exit file 155 func (c *Container) exitFilePath() (string, error) { 156 return c.ociRuntime.ExitFilePath(c) 157 } 158 159 // Wait for the container's exit file to appear. 160 // When it does, update our state based on it. 161 func (c *Container) waitForExitFileAndSync() error { 162 exitFile, err := c.exitFilePath() 163 if err != nil { 164 return err 165 } 166 167 chWait := make(chan error) 168 defer close(chWait) 169 170 _, err = WaitForFile(exitFile, chWait, time.Second*5) 171 if err != nil { 172 // Exit file did not appear 173 // Reset our state 174 c.state.ExitCode = -1 175 c.state.FinishedTime = time.Now() 176 c.state.State = define.ContainerStateStopped 177 178 if err2 := c.save(); err2 != nil { 179 logrus.Errorf("Error saving container %s state: %v", c.ID(), err2) 180 } 181 182 return err 183 } 184 185 if err := c.checkExitFile(); err != nil { 186 return err 187 } 188 189 return c.save() 190 } 191 192 // Handle the container exit file. 193 // The exit file is used to supply container exit time and exit code. 194 // This assumes the exit file already exists. 195 func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { 196 c.state.FinishedTime = ctime.Created(fi) 197 statusCodeStr, err := ioutil.ReadFile(exitFile) 198 if err != nil { 199 return errors.Wrapf(err, "failed to read exit file for container %s", c.ID()) 200 } 201 statusCode, err := strconv.Atoi(string(statusCodeStr)) 202 if err != nil { 203 return errors.Wrapf(err, "error converting exit status code (%q) for container %s to int", 204 c.ID(), statusCodeStr) 205 } 206 c.state.ExitCode = int32(statusCode) 207 208 oomFilePath := filepath.Join(c.bundlePath(), "oom") 209 if _, err = os.Stat(oomFilePath); err == nil { 210 c.state.OOMKilled = true 211 } 212 213 c.state.Exited = true 214 215 // Write an event for the container's death 216 c.newContainerExitedEvent(c.state.ExitCode) 217 218 return nil 219 } 220 221 func (c *Container) shouldRestart() bool { 222 // If we did not get a restart policy match, return false 223 // Do the same if we're not a policy that restarts. 224 if !c.state.RestartPolicyMatch || 225 c.config.RestartPolicy == define.RestartPolicyNo || 226 c.config.RestartPolicy == define.RestartPolicyNone { 227 return false 228 } 229 230 // If we're RestartPolicyOnFailure, we need to check retries and exit 231 // code. 232 if c.config.RestartPolicy == define.RestartPolicyOnFailure { 233 if c.state.ExitCode == 0 { 234 return false 235 } 236 237 // If we don't have a max retries set, continue 238 if c.config.RestartRetries > 0 { 239 if c.state.RestartCount >= c.config.RestartRetries { 240 return false 241 } 242 } 243 } 244 return true 245 } 246 247 // Handle container restart policy. 248 // This is called when a container has exited, and was not explicitly stopped by 249 // an API call to stop the container or pod it is in. 250 func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr error) { 251 if !c.shouldRestart() { 252 return false, nil 253 } 254 logrus.Debugf("Restarting container %s due to restart policy %s", c.ID(), c.config.RestartPolicy) 255 256 // Need to check if dependencies are alive. 257 if err := c.checkDependenciesAndHandleError(); err != nil { 258 return false, err 259 } 260 261 // Is the container running again? 262 // If so, we don't have to do anything 263 if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { 264 return false, nil 265 } else if c.state.State == define.ContainerStateUnknown { 266 return false, errors.Wrapf(define.ErrInternal, "invalid container state encountered in restart attempt!") 267 } 268 269 c.newContainerEvent(events.Restart) 270 271 // Increment restart count 272 c.state.RestartCount++ 273 logrus.Debugf("Container %s now on retry %d", c.ID(), c.state.RestartCount) 274 if err := c.save(); err != nil { 275 return false, err 276 } 277 278 defer func() { 279 if retErr != nil { 280 if err := c.cleanup(ctx); err != nil { 281 logrus.Errorf("error cleaning up container %s: %v", c.ID(), err) 282 } 283 } 284 }() 285 if err := c.prepare(); err != nil { 286 return false, err 287 } 288 289 // setup slirp4netns again because slirp4netns will die when conmon exits 290 if c.config.NetMode.IsSlirp4netns() { 291 err := c.runtime.setupSlirp4netns(c) 292 if err != nil { 293 return false, err 294 } 295 } 296 297 // setup rootlesskit port forwarder again since it dies when conmon exits 298 // we use rootlesskit port forwarder only as rootless and when bridge network is used 299 if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 { 300 err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path()) 301 if err != nil { 302 return false, err 303 } 304 } 305 306 if c.state.State == define.ContainerStateStopped { 307 // Reinitialize the container if we need to 308 if err := c.reinit(ctx, true); err != nil { 309 return false, err 310 } 311 } else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { 312 // Initialize the container 313 if err := c.init(ctx, true); err != nil { 314 return false, err 315 } 316 } 317 if err := c.start(); err != nil { 318 return false, err 319 } 320 return true, nil 321 } 322 323 // Ensure that the container is in a specific state or state. 324 // Returns true if the container is in one of the given states, 325 // or false otherwise. 326 func (c *Container) ensureState(states ...define.ContainerStatus) bool { 327 for _, state := range states { 328 if state == c.state.State { 329 return true 330 } 331 } 332 return false 333 } 334 335 // Sync this container with on-disk state and runtime status 336 // Should only be called with container lock held 337 // This function should suffice to ensure a container's state is accurate and 338 // it is valid for use. 339 func (c *Container) syncContainer() error { 340 if err := c.runtime.state.UpdateContainer(c); err != nil { 341 return err 342 } 343 // If runtime knows about the container, update its status in runtime 344 // And then save back to disk 345 if c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStatePaused) { 346 oldState := c.state.State 347 348 if err := c.checkExitFile(); err != nil { 349 return err 350 } 351 352 // Only save back to DB if state changed 353 if c.state.State != oldState { 354 // Check for a restart policy match 355 if c.config.RestartPolicy != define.RestartPolicyNone && c.config.RestartPolicy != define.RestartPolicyNo && 356 (oldState == define.ContainerStateRunning || oldState == define.ContainerStatePaused) && 357 (c.state.State == define.ContainerStateStopped || c.state.State == define.ContainerStateExited) && 358 !c.state.StoppedByUser { 359 c.state.RestartPolicyMatch = true 360 } 361 362 if err := c.save(); err != nil { 363 return err 364 } 365 } 366 } 367 368 if !c.valid { 369 return errors.Wrapf(define.ErrCtrRemoved, "container %s is not valid", c.ID()) 370 } 371 372 return nil 373 } 374 375 func (c *Container) setupStorageMapping(dest, from *storage.IDMappingOptions) { 376 if c.config.Rootfs != "" { 377 return 378 } 379 *dest = *from 380 // If we are creating a container inside a pod, we always want to inherit the 381 // userns settings from the infra container. So clear the auto userns settings 382 // so that we don't request storage for a new uid/gid map. 383 if c.PodID() != "" && !c.IsInfra() { 384 dest.AutoUserNs = false 385 } 386 if dest.AutoUserNs { 387 overrides := c.getUserOverrides() 388 dest.AutoUserNsOpts.PasswdFile = overrides.ContainerEtcPasswdPath 389 dest.AutoUserNsOpts.GroupFile = overrides.ContainerEtcGroupPath 390 if c.config.User != "" { 391 initialSize := uint32(0) 392 parts := strings.Split(c.config.User, ":") 393 for _, p := range parts { 394 s, err := strconv.ParseUint(p, 10, 32) 395 if err == nil && uint32(s) > initialSize { 396 initialSize = uint32(s) 397 } 398 } 399 dest.AutoUserNsOpts.InitialSize = initialSize + 1 400 } 401 } else if c.config.Spec.Linux != nil { 402 dest.UIDMap = nil 403 for _, r := range c.config.Spec.Linux.UIDMappings { 404 u := idtools.IDMap{ 405 ContainerID: int(r.ContainerID), 406 HostID: int(r.HostID), 407 Size: int(r.Size), 408 } 409 dest.UIDMap = append(dest.UIDMap, u) 410 } 411 dest.GIDMap = nil 412 for _, r := range c.config.Spec.Linux.GIDMappings { 413 g := idtools.IDMap{ 414 ContainerID: int(r.ContainerID), 415 HostID: int(r.HostID), 416 Size: int(r.Size), 417 } 418 dest.GIDMap = append(dest.GIDMap, g) 419 } 420 dest.HostUIDMapping = false 421 dest.HostGIDMapping = false 422 } 423 } 424 425 // Create container root filesystem for use 426 func (c *Container) setupStorage(ctx context.Context) error { 427 if !c.valid { 428 return errors.Wrapf(define.ErrCtrRemoved, "container %s is not valid", c.ID()) 429 } 430 431 if c.state.State != define.ContainerStateConfigured { 432 return errors.Wrapf(define.ErrCtrStateInvalid, "container %s must be in Configured state to have storage set up", c.ID()) 433 } 434 435 // Need both an image ID and image name, plus a bool telling us whether to use the image configuration 436 if c.config.Rootfs == "" && (c.config.RootfsImageID == "" || c.config.RootfsImageName == "") { 437 return errors.Wrapf(define.ErrInvalidArg, "must provide image ID and image name to use an image") 438 } 439 options := storage.ContainerOptions{ 440 IDMappingOptions: storage.IDMappingOptions{ 441 HostUIDMapping: true, 442 HostGIDMapping: true, 443 }, 444 LabelOpts: c.config.LabelOpts, 445 } 446 if c.restoreFromCheckpoint && !c.config.Privileged { 447 // If restoring from a checkpoint, the root file-system 448 // needs to be mounted with the same SELinux labels as 449 // it was mounted previously. 450 if options.Flags == nil { 451 options.Flags = make(map[string]interface{}) 452 } 453 options.Flags["ProcessLabel"] = c.config.ProcessLabel 454 options.Flags["MountLabel"] = c.config.MountLabel 455 } 456 if c.config.Privileged { 457 privOpt := func(opt string) bool { 458 for _, privopt := range []string{"nodev", "nosuid", "noexec"} { 459 if opt == privopt { 460 return true 461 } 462 } 463 return false 464 } 465 466 defOptions, err := storage.GetMountOptions(c.runtime.store.GraphDriverName(), c.runtime.store.GraphOptions()) 467 if err != nil { 468 return errors.Wrapf(err, "error getting default mount options") 469 } 470 var newOptions []string 471 for _, opt := range defOptions { 472 if !privOpt(opt) { 473 newOptions = append(newOptions, opt) 474 } 475 } 476 options.MountOpts = newOptions 477 } 478 479 options.Volatile = c.config.Volatile 480 481 c.setupStorageMapping(&options.IDMappingOptions, &c.config.IDMappings) 482 483 // Unless the user has specified a name, use a randomly generated one. 484 // Note that name conflicts may occur (see #11735), so we need to loop. 485 generateName := c.config.Name == "" 486 var containerInfo ContainerInfo 487 var containerInfoErr error 488 for { 489 if generateName { 490 name, err := c.runtime.generateName() 491 if err != nil { 492 return err 493 } 494 c.config.Name = name 495 } 496 containerInfo, containerInfoErr = c.runtime.storageService.CreateContainerStorage(ctx, c.runtime.imageContext, c.config.RootfsImageName, c.config.RootfsImageID, c.config.Name, c.config.ID, options) 497 498 if !generateName || errors.Cause(containerInfoErr) != storage.ErrDuplicateName { 499 break 500 } 501 } 502 if containerInfoErr != nil { 503 return errors.Wrapf(containerInfoErr, "error creating container storage") 504 } 505 506 c.config.IDMappings.UIDMap = containerInfo.UIDMap 507 c.config.IDMappings.GIDMap = containerInfo.GIDMap 508 509 processLabel, err := c.processLabel(containerInfo.ProcessLabel) 510 if err != nil { 511 return err 512 } 513 c.config.ProcessLabel = processLabel 514 c.config.MountLabel = containerInfo.MountLabel 515 c.config.StaticDir = containerInfo.Dir 516 c.state.RunDir = containerInfo.RunDir 517 518 if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 { 519 if err := os.Chown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil { 520 return err 521 } 522 523 if err := os.Chown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil { 524 return err 525 } 526 } 527 528 // Set the default Entrypoint and Command 529 if containerInfo.Config != nil { 530 // Set CMD in the container to the default configuration only if ENTRYPOINT is not set by the user. 531 if c.config.Entrypoint == nil && c.config.Command == nil { 532 c.config.Command = containerInfo.Config.Config.Cmd 533 } 534 if c.config.Entrypoint == nil { 535 c.config.Entrypoint = containerInfo.Config.Config.Entrypoint 536 } 537 } 538 539 artifacts := filepath.Join(c.config.StaticDir, artifactsDir) 540 if err := os.MkdirAll(artifacts, 0755); err != nil { 541 return errors.Wrap(err, "error creating artifacts directory") 542 } 543 544 return nil 545 } 546 547 func (c *Container) processLabel(processLabel string) (string, error) { 548 if !c.config.Systemd && !c.ociRuntime.SupportsKVM() { 549 return processLabel, nil 550 } 551 ctrSpec, err := c.specFromState() 552 if err != nil { 553 return "", err 554 } 555 label, ok := ctrSpec.Annotations[define.InspectAnnotationLabel] 556 if !ok || !strings.Contains(label, "type:") { 557 switch { 558 case c.ociRuntime.SupportsKVM(): 559 return selinux.KVMLabel(processLabel) 560 case c.config.Systemd: 561 return selinux.InitLabel(processLabel) 562 } 563 } 564 return processLabel, nil 565 } 566 567 // Tear down a container's storage prior to removal 568 func (c *Container) teardownStorage() error { 569 if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { 570 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove storage for container %s as it is running or paused", c.ID()) 571 } 572 573 artifacts := filepath.Join(c.config.StaticDir, artifactsDir) 574 if err := os.RemoveAll(artifacts); err != nil { 575 return errors.Wrapf(err, "error removing container %s artifacts %q", c.ID(), artifacts) 576 } 577 578 if err := c.cleanupStorage(); err != nil { 579 return errors.Wrapf(err, "failed to cleanup container %s storage", c.ID()) 580 } 581 582 if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil { 583 // If the container has already been removed, warn but do not 584 // error - we wanted it gone, it is already gone. 585 // Potentially another tool using containers/storage already 586 // removed it? 587 if errors.Cause(err) == storage.ErrNotAContainer || errors.Cause(err) == storage.ErrContainerUnknown { 588 logrus.Infof("Storage for container %s already removed", c.ID()) 589 return nil 590 } 591 592 return errors.Wrapf(err, "error removing container %s root filesystem", c.ID()) 593 } 594 595 return nil 596 } 597 598 // Reset resets state fields to default values. 599 // It is performed before a refresh and clears the state after a reboot. 600 // It does not save the results - assumes the database will do that for us. 601 func resetState(state *ContainerState) { 602 state.PID = 0 603 state.ConmonPID = 0 604 state.Mountpoint = "" 605 state.Mounted = false 606 if state.State != define.ContainerStateExited { 607 state.State = define.ContainerStateConfigured 608 } 609 state.ExecSessions = make(map[string]*ExecSession) 610 state.LegacyExecSessions = nil 611 state.BindMounts = make(map[string]string) 612 state.StoppedByUser = false 613 state.RestartPolicyMatch = false 614 state.RestartCount = 0 615 state.Checkpointed = false 616 } 617 618 // Refresh refreshes the container's state after a restart. 619 // Refresh cannot perform any operations that would lock another container. 620 // We cannot guarantee any other container has a valid lock at the time it is 621 // running. 622 func (c *Container) refresh() error { 623 // Don't need a full sync, but we do need to update from the database to 624 // pick up potentially-missing container state 625 if err := c.runtime.state.UpdateContainer(c); err != nil { 626 return err 627 } 628 629 if !c.valid { 630 return errors.Wrapf(define.ErrCtrRemoved, "container %s is not valid - may have been removed", c.ID()) 631 } 632 633 // We need to get the container's temporary directory from c/storage 634 // It was lost in the reboot and must be recreated 635 dir, err := c.runtime.storageService.GetRunDir(c.ID()) 636 if err != nil { 637 return errors.Wrapf(err, "error retrieving temporary directory for container %s", c.ID()) 638 } 639 c.state.RunDir = dir 640 641 if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 { 642 info, err := os.Stat(c.runtime.config.Engine.TmpDir) 643 if err != nil { 644 return err 645 } 646 if err := os.Chmod(c.runtime.config.Engine.TmpDir, info.Mode()|0111); err != nil { 647 return err 648 } 649 root := filepath.Join(c.runtime.config.Engine.TmpDir, "containers-root", c.ID()) 650 if err := os.MkdirAll(root, 0755); err != nil { 651 return errors.Wrapf(err, "error creating userNS tmpdir for container %s", c.ID()) 652 } 653 if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil { 654 return err 655 } 656 } 657 658 // We need to pick up a new lock 659 lock, err := c.runtime.lockManager.AllocateAndRetrieveLock(c.config.LockID) 660 if err != nil { 661 return errors.Wrapf(err, "error acquiring lock %d for container %s", c.config.LockID, c.ID()) 662 } 663 c.lock = lock 664 665 // Try to delete any lingering IP allocations. 666 // If this fails, just log and ignore. 667 // I'm a little concerned that this is so far down in refresh() and we 668 // could fail before getting to it - but the worst that would happen is 669 // that Inspect() would return info on IPs we no longer own. 670 if len(c.state.NetworkStatus) > 0 { 671 if err := c.removeIPv4Allocations(); err != nil { 672 logrus.Errorf("Error removing IP allocations for container %s: %v", c.ID(), err) 673 } 674 } 675 c.state.NetworkStatus = nil 676 677 if err := c.save(); err != nil { 678 return errors.Wrapf(err, "error refreshing state for container %s", c.ID()) 679 } 680 681 // Remove ctl and attach files, which may persist across reboot 682 if err := c.removeConmonFiles(); err != nil { 683 return err 684 } 685 686 return nil 687 } 688 689 // Try and remove IP address allocations. Presently IPv4 only. 690 // Should be safe as rootless because NetworkStatus should only be populated if 691 // CNI is running. 692 func (c *Container) removeIPv4Allocations() error { 693 cniNetworksDir, err := getCNINetworksDir() 694 if err != nil { 695 return err 696 } 697 698 if len(c.state.NetworkStatus) == 0 { 699 return nil 700 } 701 702 cniDefaultNetwork := "" 703 if c.runtime.netPlugin != nil { 704 cniDefaultNetwork = c.runtime.netPlugin.GetDefaultNetworkName() 705 } 706 707 networks, _, err := c.networks() 708 if err != nil { 709 return err 710 } 711 712 if len(networks) != len(c.state.NetworkStatus) { 713 return errors.Wrapf(define.ErrInternal, "network mismatch: asked to join %d CNI networks but got %d CNI results", len(networks), len(c.state.NetworkStatus)) 714 } 715 716 for index, result := range c.state.NetworkStatus { 717 for _, ctrIP := range result.IPs { 718 if ctrIP.Version != "4" { 719 continue 720 } 721 candidate := "" 722 if len(networks) > 0 { 723 // CNI returns networks in order we passed them. 724 // So our index into results should be our index 725 // into networks. 726 candidate = filepath.Join(cniNetworksDir, networks[index], ctrIP.Address.IP.String()) 727 } else { 728 candidate = filepath.Join(cniNetworksDir, cniDefaultNetwork, ctrIP.Address.IP.String()) 729 } 730 logrus.Debugf("Going to try removing IP address reservation file %q for container %s", candidate, c.ID()) 731 if err := os.Remove(candidate); err != nil && !os.IsNotExist(err) { 732 return errors.Wrapf(err, "error removing CNI IP reservation file %q for container %s", candidate, c.ID()) 733 } 734 } 735 } 736 737 return nil 738 } 739 740 // Remove conmon attach socket and terminal resize FIFO 741 // This is necessary for restarting containers 742 func (c *Container) removeConmonFiles() error { 743 // Files are allowed to not exist, so ignore ENOENT 744 attachFile, err := c.AttachSocketPath() 745 if err != nil { 746 return errors.Wrapf(err, "failed to get attach socket path for container %s", c.ID()) 747 } 748 749 if err := os.Remove(attachFile); err != nil && !os.IsNotExist(err) { 750 return errors.Wrapf(err, "error removing container %s attach file", c.ID()) 751 } 752 753 ctlFile := filepath.Join(c.bundlePath(), "ctl") 754 if err := os.Remove(ctlFile); err != nil && !os.IsNotExist(err) { 755 return errors.Wrapf(err, "error removing container %s ctl file", c.ID()) 756 } 757 758 winszFile := filepath.Join(c.bundlePath(), "winsz") 759 if err := os.Remove(winszFile); err != nil && !os.IsNotExist(err) { 760 return errors.Wrapf(err, "error removing container %s winsz file", c.ID()) 761 } 762 763 oomFile := filepath.Join(c.bundlePath(), "oom") 764 if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) { 765 return errors.Wrapf(err, "error removing container %s OOM file", c.ID()) 766 } 767 768 // Remove the exit file so we don't leak memory in tmpfs 769 exitFile, err := c.exitFilePath() 770 if err != nil { 771 return err 772 } 773 if err := os.Remove(exitFile); err != nil && !os.IsNotExist(err) { 774 return errors.Wrapf(err, "error removing container %s exit file", c.ID()) 775 } 776 777 return nil 778 } 779 780 func (c *Container) export(path string) error { 781 mountPoint := c.state.Mountpoint 782 if !c.state.Mounted { 783 containerMount, err := c.runtime.store.Mount(c.ID(), c.config.MountLabel) 784 if err != nil { 785 return errors.Wrapf(err, "error mounting container %q", c.ID()) 786 } 787 mountPoint = containerMount 788 defer func() { 789 if _, err := c.runtime.store.Unmount(c.ID(), false); err != nil { 790 logrus.Errorf("error unmounting container %q: %v", c.ID(), err) 791 } 792 }() 793 } 794 795 input, err := archive.Tar(mountPoint, archive.Uncompressed) 796 if err != nil { 797 return errors.Wrapf(err, "error reading container directory %q", c.ID()) 798 } 799 800 outFile, err := os.Create(path) 801 if err != nil { 802 return errors.Wrapf(err, "error creating file %q", path) 803 } 804 defer outFile.Close() 805 806 _, err = io.Copy(outFile, input) 807 return err 808 } 809 810 // Get path of artifact with a given name for this container 811 func (c *Container) getArtifactPath(name string) string { 812 return filepath.Join(c.config.StaticDir, artifactsDir, name) 813 } 814 815 // Used with Wait() to determine if a container has exited 816 func (c *Container) isStopped() (bool, int32, error) { 817 if !c.batched { 818 c.lock.Lock() 819 defer c.lock.Unlock() 820 } 821 err := c.syncContainer() 822 if err != nil { 823 return true, -1, err 824 } 825 826 return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping), c.state.ExitCode, nil 827 } 828 829 // save container state to the database 830 func (c *Container) save() error { 831 if err := c.runtime.state.SaveContainer(c); err != nil { 832 return errors.Wrapf(err, "error saving container %s state", c.ID()) 833 } 834 return nil 835 } 836 837 // Checks the container is in the right state, then initializes the container in preparation to start the container. 838 // If recursive is true, each of the containers dependencies will be started. 839 // Otherwise, this function will return with error if there are dependencies of this container that aren't running. 840 func (c *Container) prepareToStart(ctx context.Context, recursive bool) (retErr error) { 841 // Container must be created or stopped to be started 842 if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) { 843 return errors.Wrapf(define.ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID()) 844 } 845 846 if !recursive { 847 if err := c.checkDependenciesAndHandleError(); err != nil { 848 return err 849 } 850 } else { 851 if err := c.startDependencies(ctx); err != nil { 852 return err 853 } 854 } 855 856 defer func() { 857 if retErr != nil { 858 if err := c.cleanup(ctx); err != nil { 859 logrus.Errorf("error cleaning up container %s: %v", c.ID(), err) 860 } 861 } 862 }() 863 864 if err := c.prepare(); err != nil { 865 return err 866 } 867 868 if c.state.State == define.ContainerStateStopped { 869 // Reinitialize the container if we need to 870 if err := c.reinit(ctx, false); err != nil { 871 return err 872 } 873 } else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { 874 // Or initialize it if necessary 875 if err := c.init(ctx, false); err != nil { 876 return err 877 } 878 } 879 return nil 880 } 881 882 // checks dependencies are running and prints a helpful message 883 func (c *Container) checkDependenciesAndHandleError() error { 884 notRunning, err := c.checkDependenciesRunning() 885 if err != nil { 886 return errors.Wrapf(err, "error checking dependencies for container %s", c.ID()) 887 } 888 if len(notRunning) > 0 { 889 depString := strings.Join(notRunning, ",") 890 return errors.Wrapf(define.ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString) 891 } 892 893 return nil 894 } 895 896 // Recursively start all dependencies of a container so the container can be started. 897 func (c *Container) startDependencies(ctx context.Context) error { 898 depCtrIDs := c.Dependencies() 899 if len(depCtrIDs) == 0 { 900 return nil 901 } 902 903 depVisitedCtrs := make(map[string]*Container) 904 if err := c.getAllDependencies(depVisitedCtrs); err != nil { 905 return errors.Wrapf(err, "error starting dependency for container %s", c.ID()) 906 } 907 908 // Because of how Go handles passing slices through functions, a slice cannot grow between function calls 909 // without clunky syntax. Circumnavigate this by translating the map to a slice for buildContainerGraph 910 depCtrs := make([]*Container, 0) 911 for _, ctr := range depVisitedCtrs { 912 depCtrs = append(depCtrs, ctr) 913 } 914 915 // Build a dependency graph of containers 916 graph, err := BuildContainerGraph(depCtrs) 917 if err != nil { 918 return errors.Wrapf(err, "error generating dependency graph for container %s", c.ID()) 919 } 920 921 // If there are no containers without dependencies, we can't start 922 // Error out 923 if len(graph.noDepNodes) == 0 { 924 // we have no dependencies that need starting, go ahead and return 925 if len(graph.nodes) == 0 { 926 return nil 927 } 928 return errors.Wrapf(define.ErrNoSuchCtr, "All dependencies have dependencies of %s", c.ID()) 929 } 930 931 ctrErrors := make(map[string]error) 932 ctrsVisited := make(map[string]bool) 933 934 // Traverse the graph beginning at nodes with no dependencies 935 for _, node := range graph.noDepNodes { 936 startNode(ctx, node, false, ctrErrors, ctrsVisited, true) 937 } 938 939 if len(ctrErrors) > 0 { 940 logrus.Errorf("error starting some container dependencies") 941 for _, e := range ctrErrors { 942 logrus.Errorf("%q", e) 943 } 944 return errors.Wrapf(define.ErrInternal, "error starting some containers") 945 } 946 return nil 947 } 948 949 // getAllDependencies is a precursor to starting dependencies. 950 // To start a container with all of its dependencies, we need to recursively find all dependencies 951 // a container has, as well as each of those containers' dependencies, and so on 952 // To do so, keep track of containers already visited (so there aren't redundant state lookups), 953 // and recursively search until we have reached the leafs of every dependency node. 954 // Since we need to start all dependencies for our original container to successfully start, we propagate any errors 955 // in looking up dependencies. 956 // Note: this function is currently meant as a robust solution to a narrow problem: start an infra-container when 957 // a container in the pod is run. It has not been tested for performance past one level, so expansion of recursive start 958 // must be tested first. 959 func (c *Container) getAllDependencies(visited map[string]*Container) error { 960 depIDs := c.Dependencies() 961 if len(depIDs) == 0 { 962 return nil 963 } 964 for _, depID := range depIDs { 965 if _, ok := visited[depID]; !ok { 966 dep, err := c.runtime.state.Container(depID) 967 if err != nil { 968 return err 969 } 970 status, err := dep.State() 971 if err != nil { 972 return err 973 } 974 // if the dependency is already running, we can assume its dependencies are also running 975 // so no need to add them to those we need to start 976 if status != define.ContainerStateRunning { 977 visited[depID] = dep 978 if err := dep.getAllDependencies(visited); err != nil { 979 return err 980 } 981 } 982 } 983 } 984 return nil 985 } 986 987 // Check if a container's dependencies are running 988 // Returns a []string containing the IDs of dependencies that are not running 989 func (c *Container) checkDependenciesRunning() ([]string, error) { 990 deps := c.Dependencies() 991 notRunning := []string{} 992 993 // We were not passed a set of dependency containers 994 // Make it ourselves 995 depCtrs := make(map[string]*Container, len(deps)) 996 for _, dep := range deps { 997 // Get the dependency container 998 depCtr, err := c.runtime.state.Container(dep) 999 if err != nil { 1000 return nil, errors.Wrapf(err, "error retrieving dependency %s of container %s from state", dep, c.ID()) 1001 } 1002 1003 // Check the status 1004 state, err := depCtr.State() 1005 if err != nil { 1006 return nil, errors.Wrapf(err, "error retrieving state of dependency %s of container %s", dep, c.ID()) 1007 } 1008 if state != define.ContainerStateRunning && !depCtr.config.IsInfra { 1009 notRunning = append(notRunning, dep) 1010 } 1011 depCtrs[dep] = depCtr 1012 } 1013 1014 return notRunning, nil 1015 } 1016 1017 func (c *Container) completeNetworkSetup() error { 1018 var outResolvConf []string 1019 netDisabled, err := c.NetworkDisabled() 1020 if err != nil { 1021 return err 1022 } 1023 if !c.config.PostConfigureNetNS || netDisabled { 1024 return nil 1025 } 1026 if err := c.syncContainer(); err != nil { 1027 return err 1028 } 1029 if c.config.NetMode.IsSlirp4netns() { 1030 return c.runtime.setupSlirp4netns(c) 1031 } 1032 if err := c.runtime.setupNetNS(c); err != nil { 1033 return err 1034 } 1035 state := c.state 1036 // collect any dns servers that cni tells us to use (dnsname) 1037 for _, cni := range state.NetworkStatus { 1038 if cni.DNS.Nameservers != nil { 1039 for _, server := range cni.DNS.Nameservers { 1040 outResolvConf = append(outResolvConf, fmt.Sprintf("nameserver %s", server)) 1041 } 1042 } 1043 } 1044 // check if we have a bindmount for /etc/hosts 1045 if hostsBindMount, ok := state.BindMounts["/etc/hosts"]; ok && len(c.cniHosts()) > 0 { 1046 ctrHostPath := filepath.Join(c.state.RunDir, "hosts") 1047 if hostsBindMount == ctrHostPath { 1048 // read the existing hosts 1049 b, err := ioutil.ReadFile(hostsBindMount) 1050 if err != nil { 1051 return err 1052 } 1053 if err := ioutil.WriteFile(hostsBindMount, append(b, []byte(c.cniHosts())...), 0644); err != nil { 1054 return err 1055 } 1056 } 1057 } 1058 1059 // check if we have a bindmount for resolv.conf 1060 resolvBindMount := state.BindMounts["/etc/resolv.conf"] 1061 if len(outResolvConf) < 1 || resolvBindMount == "" || len(c.config.NetNsCtr) > 0 { 1062 return nil 1063 } 1064 // read the existing resolv.conf 1065 b, err := ioutil.ReadFile(resolvBindMount) 1066 if err != nil { 1067 return err 1068 } 1069 for _, line := range strings.Split(string(b), "\n") { 1070 // only keep things that don't start with nameserver from the old 1071 // resolv.conf file 1072 if !strings.HasPrefix(line, "nameserver") { 1073 outResolvConf = append([]string{line}, outResolvConf...) 1074 } 1075 } 1076 // write and return 1077 return ioutil.WriteFile(resolvBindMount, []byte(strings.Join(outResolvConf, "\n")), 0644) 1078 } 1079 1080 func (c *Container) cniHosts() string { 1081 var hosts string 1082 if len(c.state.NetworkStatus) > 0 && len(c.state.NetworkStatus[0].IPs) > 0 { 1083 ipAddress := strings.Split(c.state.NetworkStatus[0].IPs[0].Address.String(), "/")[0] 1084 hosts += fmt.Sprintf("%s\t%s %s\n", ipAddress, c.Hostname(), c.config.Name) 1085 } 1086 return hosts 1087 } 1088 1089 // Initialize a container, creating it in the runtime 1090 func (c *Container) init(ctx context.Context, retainRetries bool) error { 1091 // Unconditionally remove conmon temporary files. 1092 // We've been running into far too many issues where they block startup. 1093 if err := c.removeConmonFiles(); err != nil { 1094 return err 1095 } 1096 1097 // Generate the OCI newSpec 1098 newSpec, err := c.generateSpec(ctx) 1099 if err != nil { 1100 return err 1101 } 1102 1103 // Make sure the workdir exists while initializing container 1104 if err := c.resolveWorkDir(); err != nil { 1105 return err 1106 } 1107 1108 // Save the OCI newSpec to disk 1109 if err := c.saveSpec(newSpec); err != nil { 1110 return err 1111 } 1112 1113 for _, v := range c.config.NamedVolumes { 1114 if err := c.fixVolumePermissions(v); err != nil { 1115 return err 1116 } 1117 } 1118 1119 // With the spec complete, do an OCI create 1120 if err := c.ociRuntime.CreateContainer(c, nil); err != nil { 1121 // Fedora 31 is carrying a patch to display improved error 1122 // messages to better handle the V2 transition. This is NOT 1123 // upstream in any OCI runtime. 1124 // TODO: Remove once runc supports cgroupsv2 1125 if strings.Contains(err.Error(), "this version of runc doesn't work on cgroups v2") { 1126 logrus.Errorf("oci runtime %q does not support CGroups V2: use system migrate to mitigate", c.ociRuntime.Name()) 1127 } 1128 return err 1129 } 1130 1131 logrus.Debugf("Created container %s in OCI runtime", c.ID()) 1132 1133 // Remove any exec sessions leftover from a potential prior run. 1134 if len(c.state.ExecSessions) > 0 { 1135 if err := c.runtime.state.RemoveContainerExecSessions(c); err != nil { 1136 logrus.Errorf("Error removing container %s exec sessions from DB: %v", c.ID(), err) 1137 } 1138 c.state.ExecSessions = make(map[string]*ExecSession) 1139 } 1140 1141 c.state.Checkpointed = false 1142 c.state.ExitCode = 0 1143 c.state.Exited = false 1144 c.state.State = define.ContainerStateCreated 1145 c.state.StoppedByUser = false 1146 c.state.RestartPolicyMatch = false 1147 1148 if !retainRetries { 1149 c.state.RestartCount = 0 1150 } 1151 1152 if err := c.save(); err != nil { 1153 return err 1154 } 1155 if c.config.HealthCheckConfig != nil { 1156 if err := c.createTimer(); err != nil { 1157 logrus.Error(err) 1158 } 1159 } 1160 1161 defer c.newContainerEvent(events.Init) 1162 return c.completeNetworkSetup() 1163 } 1164 1165 // Clean up a container in the OCI runtime. 1166 // Deletes the container in the runtime, and resets its state to Exited. 1167 // The container can be restarted cleanly after this. 1168 func (c *Container) cleanupRuntime(ctx context.Context) error { 1169 // If the container is not ContainerStateStopped or 1170 // ContainerStateCreated, do nothing. 1171 if !c.ensureState(define.ContainerStateStopped, define.ContainerStateCreated) { 1172 return nil 1173 } 1174 1175 // If necessary, delete attach and ctl files 1176 if err := c.removeConmonFiles(); err != nil { 1177 return err 1178 } 1179 1180 if err := c.delete(ctx); err != nil { 1181 return err 1182 } 1183 1184 // If we were Stopped, we are now Exited, as we've removed ourself 1185 // from the runtime. 1186 // If we were Created, we are now Configured. 1187 if c.state.State == define.ContainerStateStopped { 1188 c.state.State = define.ContainerStateExited 1189 } else if c.state.State == define.ContainerStateCreated { 1190 c.state.State = define.ContainerStateConfigured 1191 } 1192 1193 if c.valid { 1194 if err := c.save(); err != nil { 1195 return err 1196 } 1197 } 1198 1199 logrus.Debugf("Successfully cleaned up container %s", c.ID()) 1200 1201 return nil 1202 } 1203 1204 // Reinitialize a container. 1205 // Deletes and recreates a container in the runtime. 1206 // Should only be done on ContainerStateStopped containers. 1207 // Not necessary for ContainerStateExited - the container has already been 1208 // removed from the runtime, so init() can proceed freely. 1209 func (c *Container) reinit(ctx context.Context, retainRetries bool) error { 1210 logrus.Debugf("Recreating container %s in OCI runtime", c.ID()) 1211 1212 if err := c.cleanupRuntime(ctx); err != nil { 1213 return err 1214 } 1215 1216 // Initialize the container again 1217 return c.init(ctx, retainRetries) 1218 } 1219 1220 // Initialize (if necessary) and start a container 1221 // Performs all necessary steps to start a container that is not running 1222 // Does not lock or check validity 1223 func (c *Container) initAndStart(ctx context.Context) (retErr error) { 1224 // If we are ContainerStateUnknown, throw an error 1225 if c.state.State == define.ContainerStateUnknown { 1226 return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is in an unknown state", c.ID()) 1227 } else if c.state.State == define.ContainerStateRemoving { 1228 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot start container %s as it is being removed", c.ID()) 1229 } 1230 1231 // If we are running, do nothing 1232 if c.state.State == define.ContainerStateRunning { 1233 return nil 1234 } 1235 // If we are paused, throw an error 1236 if c.state.State == define.ContainerStatePaused { 1237 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot start paused container %s", c.ID()) 1238 } 1239 1240 defer func() { 1241 if retErr != nil { 1242 if err := c.cleanup(ctx); err != nil { 1243 logrus.Errorf("error cleaning up container %s: %v", c.ID(), err) 1244 } 1245 } 1246 }() 1247 1248 if err := c.prepare(); err != nil { 1249 return err 1250 } 1251 1252 // If we are ContainerStateStopped we need to remove from runtime 1253 // And reset to ContainerStateConfigured 1254 if c.state.State == define.ContainerStateStopped { 1255 logrus.Debugf("Recreating container %s in OCI runtime", c.ID()) 1256 1257 if err := c.reinit(ctx, false); err != nil { 1258 return err 1259 } 1260 } else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { 1261 if err := c.init(ctx, false); err != nil { 1262 return err 1263 } 1264 } 1265 1266 // Now start the container 1267 return c.start() 1268 } 1269 1270 // Internal, non-locking function to start a container 1271 func (c *Container) start() error { 1272 if c.config.Spec.Process != nil { 1273 logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args) 1274 } 1275 1276 if err := c.ociRuntime.StartContainer(c); err != nil { 1277 return err 1278 } 1279 logrus.Debugf("Started container %s", c.ID()) 1280 1281 c.state.State = define.ContainerStateRunning 1282 1283 if c.config.SdNotifyMode != define.SdNotifyModeIgnore { 1284 payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID) 1285 if c.config.SdNotifyMode == define.SdNotifyModeConmon { 1286 payload += "\n" 1287 payload += daemon.SdNotifyReady 1288 } 1289 if sent, err := daemon.SdNotify(false, payload); err != nil { 1290 logrus.Errorf("Error notifying systemd of Conmon PID: %s", err.Error()) 1291 } else if sent { 1292 logrus.Debugf("Notify sent successfully") 1293 } 1294 } 1295 1296 if c.config.HealthCheckConfig != nil { 1297 if err := c.updateHealthStatus(define.HealthCheckStarting); err != nil { 1298 logrus.Error(err) 1299 } 1300 if err := c.startTimer(); err != nil { 1301 logrus.Error(err) 1302 } 1303 } 1304 1305 defer c.newContainerEvent(events.Start) 1306 1307 return c.save() 1308 } 1309 1310 // Internal, non-locking function to stop container 1311 func (c *Container) stop(timeout uint) error { 1312 logrus.Debugf("Stopping ctr %s (timeout %d)", c.ID(), timeout) 1313 1314 // If the container is running in a PID Namespace, then killing the 1315 // primary pid is enough to kill the container. If it is not running in 1316 // a pid namespace then the OCI Runtime needs to kill ALL processes in 1317 // the containers cgroup in order to make sure the container is stopped. 1318 all := !c.hasNamespace(spec.PIDNamespace) 1319 // We can't use --all if CGroups aren't present. 1320 // Rootless containers with CGroups v1 and NoCgroups are both cases 1321 // where this can happen. 1322 if all { 1323 if c.config.NoCgroups { 1324 all = false 1325 } else if rootless.IsRootless() { 1326 // Only do this check if we need to 1327 unified, err := cgroups.IsCgroup2UnifiedMode() 1328 if err != nil { 1329 return err 1330 } 1331 if !unified { 1332 all = false 1333 } 1334 } 1335 } 1336 1337 // Check if conmon is still alive. 1338 // If it is not, we won't be getting an exit file. 1339 conmonAlive, err := c.ociRuntime.CheckConmonRunning(c) 1340 if err != nil { 1341 return err 1342 } 1343 1344 // Set the container state to "stopping" and unlock the container 1345 // before handing it over to conmon to unblock other commands. #8501 1346 // demonstrates nicely that a high stop timeout will block even simple 1347 // commands such as `podman ps` from progressing if the container lock 1348 // is held when busy-waiting for the container to be stopped. 1349 c.state.State = define.ContainerStateStopping 1350 if err := c.save(); err != nil { 1351 return errors.Wrapf(err, "error saving container %s state before stopping", c.ID()) 1352 } 1353 if !c.batched { 1354 c.lock.Unlock() 1355 } 1356 1357 stopErr := c.ociRuntime.StopContainer(c, timeout, all) 1358 1359 if !c.batched { 1360 c.lock.Lock() 1361 if err := c.syncContainer(); err != nil { 1362 switch errors.Cause(err) { 1363 // If the container has already been removed (e.g., via 1364 // the cleanup process), there's nothing left to do. 1365 case define.ErrNoSuchCtr, define.ErrCtrRemoved: 1366 return stopErr 1367 default: 1368 if stopErr != nil { 1369 logrus.Errorf("Error syncing container %s status: %v", c.ID(), err) 1370 return stopErr 1371 } 1372 return err 1373 } 1374 } 1375 } 1376 1377 // We have to check stopErr *after* we lock again - otherwise, we have a 1378 // change of panicking on a double-unlock. Ref: GH Issue 9615 1379 if stopErr != nil { 1380 return stopErr 1381 } 1382 1383 // Since we're now subject to a race condition with other processes who 1384 // may have altered the state (and other data), let's check if the 1385 // state has changed. If so, we should return immediately and log a 1386 // warning. 1387 if c.state.State != define.ContainerStateStopping { 1388 logrus.Warnf( 1389 "Container %q state changed from %q to %q while waiting for it to be stopped: discontinuing stop procedure as another process interfered", 1390 c.ID(), define.ContainerStateStopping, c.state.State, 1391 ) 1392 return nil 1393 } 1394 1395 c.newContainerEvent(events.Stop) 1396 1397 c.state.PID = 0 1398 c.state.ConmonPID = 0 1399 c.state.StoppedByUser = true 1400 1401 if !conmonAlive { 1402 // Conmon is dead, so we can't expect an exit code. 1403 c.state.ExitCode = -1 1404 c.state.FinishedTime = time.Now() 1405 c.state.State = define.ContainerStateStopped 1406 if err := c.save(); err != nil { 1407 logrus.Errorf("Error saving container %s status: %v", c.ID(), err) 1408 } 1409 1410 return errors.Wrapf(define.ErrConmonDead, "container %s conmon process missing, cannot retrieve exit code", c.ID()) 1411 } 1412 1413 if err := c.save(); err != nil { 1414 return errors.Wrapf(err, "error saving container %s state after stopping", c.ID()) 1415 } 1416 1417 // Wait until we have an exit file, and sync once we do 1418 if err := c.waitForExitFileAndSync(); err != nil { 1419 return err 1420 } 1421 1422 return nil 1423 } 1424 1425 // Internal, non-locking function to pause a container 1426 func (c *Container) pause() error { 1427 if c.config.NoCgroups { 1428 return errors.Wrapf(define.ErrNoCgroups, "cannot pause without using CGroups") 1429 } 1430 1431 if rootless.IsRootless() { 1432 cgroupv2, err := cgroups.IsCgroup2UnifiedMode() 1433 if err != nil { 1434 return errors.Wrap(err, "failed to determine cgroupversion") 1435 } 1436 if !cgroupv2 { 1437 return errors.Wrap(define.ErrNoCgroups, "can not pause containers on rootless containers with cgroup V1") 1438 } 1439 } 1440 1441 if err := c.ociRuntime.PauseContainer(c); err != nil { 1442 // TODO when using docker-py there is some sort of race/incompatibility here 1443 return err 1444 } 1445 1446 logrus.Debugf("Paused container %s", c.ID()) 1447 1448 c.state.State = define.ContainerStatePaused 1449 1450 return c.save() 1451 } 1452 1453 // Internal, non-locking function to unpause a container 1454 func (c *Container) unpause() error { 1455 if c.config.NoCgroups { 1456 return errors.Wrapf(define.ErrNoCgroups, "cannot unpause without using CGroups") 1457 } 1458 1459 if err := c.ociRuntime.UnpauseContainer(c); err != nil { 1460 // TODO when using docker-py there is some sort of race/incompatibility here 1461 return err 1462 } 1463 1464 logrus.Debugf("Unpaused container %s", c.ID()) 1465 1466 c.state.State = define.ContainerStateRunning 1467 1468 return c.save() 1469 } 1470 1471 // Internal, non-locking function to restart a container 1472 func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) { 1473 if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) { 1474 return errors.Wrapf(define.ErrCtrStateInvalid, "unable to restart a container in a paused or unknown state") 1475 } 1476 1477 c.newContainerEvent(events.Restart) 1478 1479 if c.state.State == define.ContainerStateRunning { 1480 conmonPID := c.state.ConmonPID 1481 if err := c.stop(timeout); err != nil { 1482 return err 1483 } 1484 // Old versions of conmon have a bug where they create the exit file before 1485 // closing open file descriptors causing a race condition when restarting 1486 // containers with open ports since we cannot bind the ports as they're not 1487 // yet closed by conmon. 1488 // 1489 // Killing the old conmon PID is ~okay since it forces the FDs of old conmons 1490 // to be closed, while it's a NOP for newer versions which should have 1491 // exited already. 1492 if conmonPID != 0 { 1493 // Ignore errors from FindProcess() as conmon could already have exited. 1494 p, err := os.FindProcess(conmonPID) 1495 if p != nil && err == nil { 1496 if err = p.Kill(); err != nil { 1497 logrus.Debugf("error killing conmon process: %v", err) 1498 } 1499 } 1500 } 1501 // Ensure we tear down the container network so it will be 1502 // recreated - otherwise, behavior of restart differs from stop 1503 // and start 1504 if err := c.cleanupNetwork(); err != nil { 1505 return err 1506 } 1507 } 1508 defer func() { 1509 if retErr != nil { 1510 if err := c.cleanup(ctx); err != nil { 1511 logrus.Errorf("error cleaning up container %s: %v", c.ID(), err) 1512 } 1513 } 1514 }() 1515 if err := c.prepare(); err != nil { 1516 return err 1517 } 1518 1519 if c.state.State == define.ContainerStateStopped { 1520 // Reinitialize the container if we need to 1521 if err := c.reinit(ctx, false); err != nil { 1522 return err 1523 } 1524 } else if c.state.State == define.ContainerStateConfigured || 1525 c.state.State == define.ContainerStateExited { 1526 // Initialize the container 1527 if err := c.init(ctx, false); err != nil { 1528 return err 1529 } 1530 } 1531 return c.start() 1532 } 1533 1534 // mountStorage sets up the container's root filesystem 1535 // It mounts the image and any other requested mounts 1536 // TODO: Add ability to override mount label so we can use this for Mount() too 1537 // TODO: Can we use this for export? Copying SHM into the export might not be 1538 // good 1539 func (c *Container) mountStorage() (_ string, deferredErr error) { 1540 var err error 1541 // Container already mounted, nothing to do 1542 if c.state.Mounted { 1543 return c.state.Mountpoint, nil 1544 } 1545 1546 mounted, err := mount.Mounted(c.config.ShmDir) 1547 if err != nil { 1548 return "", errors.Wrapf(err, "unable to determine if %q is mounted", c.config.ShmDir) 1549 } 1550 1551 if !mounted && !MountExists(c.config.Spec.Mounts, "/dev/shm") { 1552 shmOptions := fmt.Sprintf("mode=1777,size=%d", c.config.ShmSize) 1553 if err := c.mountSHM(shmOptions); err != nil { 1554 return "", err 1555 } 1556 if err := os.Chown(c.config.ShmDir, c.RootUID(), c.RootGID()); err != nil { 1557 return "", errors.Wrapf(err, "failed to chown %s", c.config.ShmDir) 1558 } 1559 defer func() { 1560 if deferredErr != nil { 1561 if err := c.unmountSHM(c.config.ShmDir); err != nil { 1562 logrus.Errorf("Error unmounting SHM for container %s after mount error: %v", c.ID(), err) 1563 } 1564 } 1565 }() 1566 } 1567 1568 // We need to mount the container before volumes - to ensure the copyup 1569 // works properly. 1570 mountPoint := c.config.Rootfs 1571 if mountPoint == "" { 1572 mountPoint, err = c.mount() 1573 if err != nil { 1574 return "", err 1575 } 1576 defer func() { 1577 if deferredErr != nil { 1578 if err := c.unmount(false); err != nil { 1579 logrus.Errorf("Error unmounting container %s after mount error: %v", c.ID(), err) 1580 } 1581 } 1582 }() 1583 } 1584 1585 rootUID, rootGID := c.RootUID(), c.RootGID() 1586 1587 dirfd, err := unix.Open(mountPoint, unix.O_RDONLY|unix.O_PATH, 0) 1588 if err != nil { 1589 return "", errors.Wrap(err, "open mount point") 1590 } 1591 defer unix.Close(dirfd) 1592 1593 err = unix.Mkdirat(dirfd, "etc", 0755) 1594 if err != nil && !os.IsExist(err) { 1595 return "", errors.Wrap(err, "create /etc") 1596 } 1597 // If the etc directory was created, chown it to root in the container 1598 if err == nil && (rootUID != 0 || rootGID != 0) { 1599 err = unix.Fchownat(dirfd, "etc", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW) 1600 if err != nil { 1601 return "", errors.Wrap(err, "chown /etc") 1602 } 1603 } 1604 1605 etcInTheContainerPath, err := securejoin.SecureJoin(mountPoint, "etc") 1606 if err != nil { 1607 return "", errors.Wrap(err, "resolve /etc in the container") 1608 } 1609 1610 etcInTheContainerFd, err := unix.Open(etcInTheContainerPath, unix.O_RDONLY|unix.O_PATH, 0) 1611 if err != nil { 1612 return "", errors.Wrap(err, "open /etc in the container") 1613 } 1614 defer unix.Close(etcInTheContainerFd) 1615 1616 // If /etc/mtab does not exist in container image, then we need to 1617 // create it, so that mount command within the container will work. 1618 err = unix.Symlinkat("/proc/mounts", etcInTheContainerFd, "mtab") 1619 if err != nil && !os.IsExist(err) { 1620 return "", errors.Wrap(err, "creating /etc/mtab symlink") 1621 } 1622 // If the symlink was created, then also chown it to root in the container 1623 if err == nil && (rootUID != 0 || rootGID != 0) { 1624 err = unix.Fchownat(etcInTheContainerFd, "mtab", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW) 1625 if err != nil { 1626 return "", errors.Wrap(err, "chown /etc/mtab") 1627 } 1628 } 1629 1630 // Request a mount of all named volumes 1631 for _, v := range c.config.NamedVolumes { 1632 vol, err := c.mountNamedVolume(v, mountPoint) 1633 if err != nil { 1634 return "", err 1635 } 1636 defer func() { 1637 if deferredErr == nil { 1638 return 1639 } 1640 vol.lock.Lock() 1641 if err := vol.unmount(false); err != nil { 1642 logrus.Errorf("Error unmounting volume %s after error mounting container %s: %v", vol.Name(), c.ID(), err) 1643 } 1644 vol.lock.Unlock() 1645 }() 1646 } 1647 1648 return mountPoint, nil 1649 } 1650 1651 // Mount a single named volume into the container. 1652 // If necessary, copy up image contents into the volume. 1653 // Does not verify that the name volume given is actually present in container 1654 // config. 1655 // Returns the volume that was mounted. 1656 func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) (*Volume, error) { 1657 logrus.Debugf("Going to mount named volume %s", v.Name) 1658 vol, err := c.runtime.state.Volume(v.Name) 1659 if err != nil { 1660 return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) 1661 } 1662 1663 if vol.config.LockID == c.config.LockID { 1664 return nil, errors.Wrapf(define.ErrWillDeadlock, "container %s and volume %s share lock ID %d", c.ID(), vol.Name(), c.config.LockID) 1665 } 1666 vol.lock.Lock() 1667 defer vol.lock.Unlock() 1668 if vol.needsMount() { 1669 if err := vol.mount(); err != nil { 1670 return nil, errors.Wrapf(err, "error mounting volume %s for container %s", vol.Name(), c.ID()) 1671 } 1672 } 1673 // The volume may need a copy-up. Check the state. 1674 if err := vol.update(); err != nil { 1675 return nil, err 1676 } 1677 if vol.state.NeedsCopyUp { 1678 logrus.Debugf("Copying up contents from container %s to volume %s", c.ID(), vol.Name()) 1679 1680 // Set NeedsCopyUp to false immediately, so we don't try this 1681 // again when there are already files copied. 1682 vol.state.NeedsCopyUp = false 1683 if err := vol.save(); err != nil { 1684 return nil, err 1685 } 1686 1687 // If the volume is not empty, we should not copy up. 1688 volMount := vol.mountPoint() 1689 contents, err := ioutil.ReadDir(volMount) 1690 if err != nil { 1691 return nil, errors.Wrapf(err, "error listing contents of volume %s mountpoint when copying up from container %s", vol.Name(), c.ID()) 1692 } 1693 if len(contents) > 0 { 1694 // The volume is not empty. It was likely modified 1695 // outside of Podman. For safety, let's not copy up into 1696 // it. Fixes CVE-2020-1726. 1697 return vol, nil 1698 } 1699 1700 srcDir, err := securejoin.SecureJoin(mountpoint, v.Dest) 1701 if err != nil { 1702 return nil, errors.Wrapf(err, "error calculating destination path to copy up container %s volume %s", c.ID(), vol.Name()) 1703 } 1704 // Do a manual stat on the source directory to verify existence. 1705 // Skip the rest if it exists. 1706 // TODO: Should this be stat or lstat? I'm using lstat because I 1707 // think copy-up doesn't happen when the source is a link. 1708 srcStat, err := os.Lstat(srcDir) 1709 if err != nil { 1710 if os.IsNotExist(err) { 1711 // Source does not exist, don't bother copying 1712 // up. 1713 return vol, nil 1714 } 1715 return nil, errors.Wrapf(err, "error identifying source directory for copy up into volume %s", vol.Name()) 1716 } 1717 // If it's not a directory we're mounting over it. 1718 if !srcStat.IsDir() { 1719 return vol, nil 1720 } 1721 // Read contents, do not bother continuing if it's empty. Fixes 1722 // a bizarre issue where something copier.Get will ENOENT on 1723 // empty directories and sometimes it will not. 1724 // RHBZ#1928643 1725 srcContents, err := ioutil.ReadDir(srcDir) 1726 if err != nil { 1727 return nil, errors.Wrapf(err, "error reading contents of source directory for copy up into volume %s", vol.Name()) 1728 } 1729 if len(srcContents) == 0 { 1730 return vol, nil 1731 } 1732 1733 // Buildah Copier accepts a reader, so we'll need a pipe. 1734 reader, writer := io.Pipe() 1735 defer reader.Close() 1736 1737 errChan := make(chan error, 1) 1738 1739 logrus.Infof("About to copy up into volume %s", vol.Name()) 1740 1741 // Copy, container side: get a tar archive of what needs to be 1742 // streamed into the volume. 1743 go func() { 1744 defer writer.Close() 1745 getOptions := copier.GetOptions{ 1746 KeepDirectoryNames: false, 1747 } 1748 errChan <- copier.Get(srcDir, "", getOptions, []string{"/."}, writer) 1749 }() 1750 1751 // Copy, volume side: stream what we've written to the pipe, into 1752 // the volume. 1753 copyOpts := copier.PutOptions{} 1754 if err := copier.Put(volMount, "", copyOpts, reader); err != nil { 1755 err2 := <-errChan 1756 if err2 != nil { 1757 logrus.Errorf("Error streaming contents of container %s directory for volume copy-up: %v", c.ID(), err2) 1758 } 1759 return nil, errors.Wrapf(err, "error copying up to volume %s", vol.Name()) 1760 } 1761 1762 if err := <-errChan; err != nil { 1763 return nil, errors.Wrapf(err, "error streaming container content for copy up into volume %s", vol.Name()) 1764 } 1765 } 1766 return vol, nil 1767 } 1768 1769 // cleanupStorage unmounts and cleans up the container's root filesystem 1770 func (c *Container) cleanupStorage() error { 1771 if !c.state.Mounted { 1772 // Already unmounted, do nothing 1773 logrus.Debugf("Container %s storage is already unmounted, skipping...", c.ID()) 1774 return nil 1775 } 1776 1777 var cleanupErr error 1778 1779 for _, containerMount := range c.config.Mounts { 1780 if err := c.unmountSHM(containerMount); err != nil { 1781 if cleanupErr != nil { 1782 logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr) 1783 } 1784 cleanupErr = err 1785 } 1786 } 1787 1788 if err := c.cleanupOverlayMounts(); err != nil { 1789 // If the container can't remove content report the error 1790 logrus.Errorf("Failed to cleanup overlay mounts for %s: %v", c.ID(), err) 1791 cleanupErr = err 1792 } 1793 1794 if c.config.Rootfs != "" { 1795 return cleanupErr 1796 } 1797 1798 if err := c.unmount(false); err != nil { 1799 // If the container has already been removed, warn but don't 1800 // error 1801 // We still want to be able to kick the container out of the 1802 // state 1803 if errors.Cause(err) == storage.ErrNotAContainer || errors.Cause(err) == storage.ErrContainerUnknown || errors.Cause(err) == storage.ErrLayerNotMounted { 1804 logrus.Errorf("Storage for container %s has been removed", c.ID()) 1805 } else { 1806 if cleanupErr != nil { 1807 logrus.Errorf("Error cleaning up container %s storage: %v", c.ID(), cleanupErr) 1808 } 1809 cleanupErr = err 1810 } 1811 } 1812 1813 // Request an unmount of all named volumes 1814 for _, v := range c.config.NamedVolumes { 1815 vol, err := c.runtime.state.Volume(v.Name) 1816 if err != nil { 1817 if cleanupErr != nil { 1818 logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr) 1819 } 1820 cleanupErr = errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) 1821 1822 // We need to try and unmount every volume, so continue 1823 // if they fail. 1824 continue 1825 } 1826 1827 if vol.needsMount() { 1828 vol.lock.Lock() 1829 if err := vol.unmount(false); err != nil { 1830 if cleanupErr != nil { 1831 logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr) 1832 } 1833 cleanupErr = errors.Wrapf(err, "error unmounting volume %s for container %s", vol.Name(), c.ID()) 1834 } 1835 vol.lock.Unlock() 1836 } 1837 } 1838 1839 c.state.Mountpoint = "" 1840 c.state.Mounted = false 1841 1842 if c.valid { 1843 if err := c.save(); err != nil { 1844 if cleanupErr != nil { 1845 logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr) 1846 } 1847 cleanupErr = err 1848 } 1849 } 1850 return cleanupErr 1851 } 1852 1853 // Unmount the a container and free its resources 1854 func (c *Container) cleanup(ctx context.Context) error { 1855 var lastError error 1856 1857 logrus.Debugf("Cleaning up container %s", c.ID()) 1858 1859 // Remove healthcheck unit/timer file if it execs 1860 if c.config.HealthCheckConfig != nil { 1861 if err := c.removeTimer(); err != nil { 1862 logrus.Errorf("Error removing timer for container %s healthcheck: %v", c.ID(), err) 1863 } 1864 } 1865 1866 // Clean up network namespace, if present 1867 if err := c.cleanupNetwork(); err != nil { 1868 lastError = errors.Wrapf(err, "error removing container %s network", c.ID()) 1869 } 1870 1871 // Remove the container from the runtime, if necessary. 1872 // Do this *before* unmounting storage - some runtimes (e.g. Kata) 1873 // apparently object to having storage removed while the container still 1874 // exists. 1875 if err := c.cleanupRuntime(ctx); err != nil { 1876 if lastError != nil { 1877 logrus.Errorf("Error removing container %s from OCI runtime: %v", c.ID(), err) 1878 } else { 1879 lastError = err 1880 } 1881 } 1882 1883 // Unmount storage 1884 if err := c.cleanupStorage(); err != nil { 1885 if lastError != nil { 1886 logrus.Errorf("Error unmounting container %s storage: %v", c.ID(), err) 1887 } else { 1888 lastError = errors.Wrapf(err, "error unmounting container %s storage", c.ID()) 1889 } 1890 } 1891 1892 // Unmount image volumes 1893 for _, v := range c.config.ImageVolumes { 1894 img, _, err := c.runtime.LibimageRuntime().LookupImage(v.Source, nil) 1895 if err != nil { 1896 if lastError == nil { 1897 lastError = err 1898 continue 1899 } 1900 logrus.Errorf("error unmounting image volume %q:%q :%v", v.Source, v.Dest, err) 1901 } 1902 if err := img.Unmount(false); err != nil { 1903 if lastError == nil { 1904 lastError = err 1905 continue 1906 } 1907 logrus.Errorf("error unmounting image volume %q:%q :%v", v.Source, v.Dest, err) 1908 } 1909 } 1910 1911 return lastError 1912 } 1913 1914 // delete deletes the container and runs any configured poststop 1915 // hooks. 1916 func (c *Container) delete(ctx context.Context) error { 1917 if err := c.ociRuntime.DeleteContainer(c); err != nil { 1918 return errors.Wrapf(err, "error removing container %s from runtime", c.ID()) 1919 } 1920 1921 if err := c.postDeleteHooks(ctx); err != nil { 1922 return errors.Wrapf(err, "container %s poststop hooks", c.ID()) 1923 } 1924 1925 return nil 1926 } 1927 1928 // postDeleteHooks runs the poststop hooks (if any) as specified by 1929 // the OCI Runtime Specification (which requires them to run 1930 // post-delete, despite the stage name). 1931 func (c *Container) postDeleteHooks(ctx context.Context) error { 1932 if c.state.ExtensionStageHooks != nil { 1933 extensionHooks, ok := c.state.ExtensionStageHooks["poststop"] 1934 if ok { 1935 state, err := json.Marshal(spec.State{ 1936 Version: spec.Version, 1937 ID: c.ID(), 1938 Status: "stopped", 1939 Bundle: c.bundlePath(), 1940 Annotations: c.config.Spec.Annotations, 1941 }) 1942 if err != nil { 1943 return err 1944 } 1945 for i, hook := range extensionHooks { 1946 hook := hook 1947 logrus.Debugf("container %s: invoke poststop hook %d, path %s", c.ID(), i, hook.Path) 1948 var stderr, stdout bytes.Buffer 1949 hookErr, err := exec.Run(ctx, &hook, state, &stdout, &stderr, exec.DefaultPostKillTimeout) 1950 if err != nil { 1951 logrus.Warnf("container %s: poststop hook %d: %v", c.ID(), i, err) 1952 if hookErr != err { 1953 logrus.Debugf("container %s: poststop hook %d (hook error): %v", c.ID(), i, hookErr) 1954 } 1955 stdoutString := stdout.String() 1956 if stdoutString != "" { 1957 logrus.Debugf("container %s: poststop hook %d: stdout:\n%s", c.ID(), i, stdoutString) 1958 } 1959 stderrString := stderr.String() 1960 if stderrString != "" { 1961 logrus.Debugf("container %s: poststop hook %d: stderr:\n%s", c.ID(), i, stderrString) 1962 } 1963 } 1964 } 1965 } 1966 } 1967 1968 return nil 1969 } 1970 1971 // writeStringToRundir writes the given string to a file with the given name in 1972 // the container's temporary files directory. The file will be chown'd to the 1973 // container's root user and have an appropriate SELinux label set. 1974 // If a file with the same name already exists, it will be deleted and recreated 1975 // with the new contents. 1976 // Returns the full path to the new file. 1977 func (c *Container) writeStringToRundir(destFile, contents string) (string, error) { 1978 destFileName := filepath.Join(c.state.RunDir, destFile) 1979 1980 if err := os.Remove(destFileName); err != nil && !os.IsNotExist(err) { 1981 return "", errors.Wrapf(err, "error removing %s for container %s", destFile, c.ID()) 1982 } 1983 1984 if err := writeStringToPath(destFileName, contents, c.config.MountLabel, c.RootUID(), c.RootGID()); err != nil { 1985 return "", err 1986 } 1987 1988 return destFileName, nil 1989 } 1990 1991 // writeStringToStaticDir writes the given string to a file with the given name 1992 // in the container's permanent files directory. The file will be chown'd to the 1993 // container's root user and have an appropriate SELinux label set. 1994 // Unlike writeStringToRundir, will *not* delete and re-create if the file 1995 // already exists (will instead error). 1996 // Returns the full path to the new file. 1997 func (c *Container) writeStringToStaticDir(filename, contents string) (string, error) { 1998 destFileName := filepath.Join(c.config.StaticDir, filename) 1999 2000 if err := writeStringToPath(destFileName, contents, c.config.MountLabel, c.RootUID(), c.RootGID()); err != nil { 2001 return "", err 2002 } 2003 2004 return destFileName, nil 2005 } 2006 2007 // appendStringToRunDir appends the provided string to the runtimedir file 2008 func (c *Container) appendStringToRunDir(destFile, output string) (string, error) { 2009 destFileName := filepath.Join(c.state.RunDir, destFile) 2010 2011 f, err := os.OpenFile(destFileName, os.O_APPEND|os.O_RDWR, 0600) 2012 if err != nil { 2013 return "", err 2014 } 2015 defer f.Close() 2016 2017 compareStr := strings.TrimRight(output, "\n") 2018 scanner := bufio.NewScanner(f) 2019 scanner.Split(bufio.ScanLines) 2020 2021 for scanner.Scan() { 2022 if strings.Compare(scanner.Text(), compareStr) == 0 { 2023 return filepath.Join(c.state.RunDir, destFile), nil 2024 } 2025 } 2026 2027 if _, err := f.WriteString(output); err != nil { 2028 return "", errors.Wrapf(err, "unable to write %s", destFileName) 2029 } 2030 2031 return filepath.Join(c.state.RunDir, destFile), nil 2032 } 2033 2034 // saveSpec saves the OCI spec to disk, replacing any existing specs for the container 2035 func (c *Container) saveSpec(spec *spec.Spec) error { 2036 // If the OCI spec already exists, we need to replace it 2037 // Cannot guarantee some things, e.g. network namespaces, have the same 2038 // paths 2039 jsonPath := filepath.Join(c.bundlePath(), "config.json") 2040 if _, err := os.Stat(jsonPath); err != nil { 2041 if !os.IsNotExist(err) { 2042 return errors.Wrapf(err, "error doing stat on container %s spec", c.ID()) 2043 } 2044 // The spec does not exist, we're fine 2045 } else { 2046 // The spec exists, need to remove it 2047 if err := os.Remove(jsonPath); err != nil { 2048 return errors.Wrapf(err, "error replacing runtime spec for container %s", c.ID()) 2049 } 2050 } 2051 2052 fileJSON, err := json.Marshal(spec) 2053 if err != nil { 2054 return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID()) 2055 } 2056 if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil { 2057 return errors.Wrapf(err, "error writing runtime spec JSON for container %s to disk", c.ID()) 2058 } 2059 2060 logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath) 2061 2062 c.state.ConfigPath = jsonPath 2063 2064 return nil 2065 } 2066 2067 // Warning: precreate hooks may alter 'config' in place. 2068 func (c *Container) setupOCIHooks(ctx context.Context, config *spec.Spec) (map[string][]spec.Hook, error) { 2069 allHooks := make(map[string][]spec.Hook) 2070 if c.runtime.config.Engine.HooksDir == nil { 2071 if rootless.IsRootless() { 2072 return nil, nil 2073 } 2074 for _, hDir := range []string{hooks.DefaultDir, hooks.OverrideDir} { 2075 manager, err := hooks.New(ctx, []string{hDir}, []string{"precreate", "poststop"}) 2076 if err != nil { 2077 if os.IsNotExist(err) { 2078 continue 2079 } 2080 return nil, err 2081 } 2082 ociHooks, err := manager.Hooks(config, c.Spec().Annotations, len(c.config.UserVolumes) > 0) 2083 if err != nil { 2084 return nil, err 2085 } 2086 if len(ociHooks) > 0 || config.Hooks != nil { 2087 logrus.Warnf("implicit hook directories are deprecated; set --ociHooks-dir=%q explicitly to continue to load ociHooks from this directory", hDir) 2088 } 2089 for i, hook := range ociHooks { 2090 allHooks[i] = hook 2091 } 2092 } 2093 } else { 2094 manager, err := hooks.New(ctx, c.runtime.config.Engine.HooksDir, []string{"precreate", "poststop"}) 2095 if err != nil { 2096 return nil, err 2097 } 2098 2099 allHooks, err = manager.Hooks(config, c.Spec().Annotations, len(c.config.UserVolumes) > 0) 2100 if err != nil { 2101 return nil, err 2102 } 2103 } 2104 2105 hookErr, err := exec.RuntimeConfigFilter(ctx, allHooks["precreate"], config, exec.DefaultPostKillTimeout) 2106 if err != nil { 2107 logrus.Warnf("container %s: precreate hook: %v", c.ID(), err) 2108 if hookErr != nil && hookErr != err { 2109 logrus.Debugf("container %s: precreate hook (hook error): %v", c.ID(), hookErr) 2110 } 2111 return nil, err 2112 } 2113 2114 return allHooks, nil 2115 } 2116 2117 // mount mounts the container's root filesystem 2118 func (c *Container) mount() (string, error) { 2119 if c.state.State == define.ContainerStateRemoving { 2120 return "", errors.Wrapf(define.ErrCtrStateInvalid, "cannot mount container %s as it is being removed", c.ID()) 2121 } 2122 2123 mountPoint, err := c.runtime.storageService.MountContainerImage(c.ID()) 2124 if err != nil { 2125 return "", errors.Wrapf(err, "error mounting storage for container %s", c.ID()) 2126 } 2127 mountPoint, err = filepath.EvalSymlinks(mountPoint) 2128 if err != nil { 2129 return "", errors.Wrapf(err, "error resolving storage path for container %s", c.ID()) 2130 } 2131 if err := os.Chown(mountPoint, c.RootUID(), c.RootGID()); err != nil { 2132 return "", errors.Wrapf(err, "cannot chown %s to %d:%d", mountPoint, c.RootUID(), c.RootGID()) 2133 } 2134 return mountPoint, nil 2135 } 2136 2137 // unmount unmounts the container's root filesystem 2138 func (c *Container) unmount(force bool) error { 2139 // Also unmount storage 2140 if _, err := c.runtime.storageService.UnmountContainerImage(c.ID(), force); err != nil { 2141 return errors.Wrapf(err, "error unmounting container %s root filesystem", c.ID()) 2142 } 2143 2144 return nil 2145 } 2146 2147 // checkReadyForRemoval checks whether the given container is ready to be 2148 // removed. 2149 // These checks are only used if force-remove is not specified. 2150 // If it is, we'll remove the container anyways. 2151 // Returns nil if safe to remove, or an error describing why it's unsafe if not. 2152 func (c *Container) checkReadyForRemoval() error { 2153 if c.state.State == define.ContainerStateUnknown { 2154 return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is in invalid state", c.ID()) 2155 } 2156 2157 if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) && !c.IsInfra() { 2158 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed without force", c.ID(), c.state.State.String()) 2159 } 2160 2161 // Check exec sessions 2162 sessions, err := c.getActiveExecSessions() 2163 if err != nil { 2164 return err 2165 } 2166 if len(sessions) != 0 { 2167 return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID()) 2168 } 2169 2170 return nil 2171 } 2172 2173 // canWithPrevious return the stat of the preCheckPoint dir 2174 func (c *Container) canWithPrevious() error { 2175 _, err := os.Stat(c.PreCheckPointPath()) 2176 return err 2177 } 2178 2179 // prepareCheckpointExport writes the config and spec to 2180 // JSON files for later export 2181 func (c *Container) prepareCheckpointExport() error { 2182 // save live config 2183 if _, err := metadata.WriteJSONFile(c.config, c.bundlePath(), metadata.ConfigDumpFile); err != nil { 2184 return err 2185 } 2186 2187 // save spec 2188 jsonPath := filepath.Join(c.bundlePath(), "config.json") 2189 g, err := generate.NewFromFile(jsonPath) 2190 if err != nil { 2191 logrus.Debugf("generating spec for container %q failed with %v", c.ID(), err) 2192 return err 2193 } 2194 if _, err := metadata.WriteJSONFile(g.Config, c.bundlePath(), metadata.SpecDumpFile); err != nil { 2195 return err 2196 } 2197 2198 return nil 2199 } 2200 2201 // sortUserVolumes sorts the volumes specified for a container 2202 // between named and normal volumes 2203 func (c *Container) sortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume, []spec.Mount) { 2204 namedUserVolumes := []*ContainerNamedVolume{} 2205 userMounts := []spec.Mount{} 2206 2207 // We need to parse all named volumes and mounts into maps, so we don't 2208 // end up with repeated lookups for each user volume. 2209 // Map destination to struct, as destination is what is stored in 2210 // UserVolumes. 2211 namedVolumes := make(map[string]*ContainerNamedVolume) 2212 mounts := make(map[string]spec.Mount) 2213 for _, namedVol := range c.config.NamedVolumes { 2214 namedVolumes[namedVol.Dest] = namedVol 2215 } 2216 for _, mount := range ctrSpec.Mounts { 2217 mounts[mount.Destination] = mount 2218 } 2219 2220 for _, vol := range c.config.UserVolumes { 2221 if volume, ok := namedVolumes[vol]; ok { 2222 namedUserVolumes = append(namedUserVolumes, volume) 2223 } else if mount, ok := mounts[vol]; ok { 2224 userMounts = append(userMounts, mount) 2225 } else { 2226 logrus.Warnf("Could not find mount at destination %q when parsing user volumes for container %s", vol, c.ID()) 2227 } 2228 } 2229 return namedUserVolumes, userMounts 2230 } 2231 2232 // Check for an exit file, and handle one if present 2233 func (c *Container) checkExitFile() error { 2234 // If the container's not running, nothing to do. 2235 if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) { 2236 return nil 2237 } 2238 2239 exitFile, err := c.exitFilePath() 2240 if err != nil { 2241 return err 2242 } 2243 2244 // Check for the exit file 2245 info, err := os.Stat(exitFile) 2246 if err != nil { 2247 if os.IsNotExist(err) { 2248 // Container is still running, no error 2249 return nil 2250 } 2251 2252 return errors.Wrapf(err, "error running stat on container %s exit file", c.ID()) 2253 } 2254 2255 // Alright, it exists. Transition to Stopped state. 2256 c.state.State = define.ContainerStateStopped 2257 c.state.PID = 0 2258 c.state.ConmonPID = 0 2259 2260 // Read the exit file to get our stopped time and exit code. 2261 return c.handleExitFile(exitFile, info) 2262 } 2263 2264 func (c *Container) hasNamespace(namespace spec.LinuxNamespaceType) bool { 2265 if c.config.Spec == nil || c.config.Spec.Linux == nil { 2266 return false 2267 } 2268 for _, n := range c.config.Spec.Linux.Namespaces { 2269 if n.Type == namespace { 2270 return true 2271 } 2272 } 2273 return false 2274 } 2275 2276 // extractSecretToStorage copies a secret's data from the secrets manager to the container's static dir 2277 func (c *Container) extractSecretToCtrStorage(secr *ContainerSecret) error { 2278 manager, err := c.runtime.SecretsManager() 2279 if err != nil { 2280 return err 2281 } 2282 _, data, err := manager.LookupSecretData(secr.Name) 2283 if err != nil { 2284 return err 2285 } 2286 secretFile := filepath.Join(c.config.SecretsPath, secr.Name) 2287 2288 hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), secr.UID, secr.GID) 2289 if err != nil { 2290 return errors.Wrap(err, "unable to extract secret") 2291 } 2292 err = ioutil.WriteFile(secretFile, data, 0644) 2293 if err != nil { 2294 return errors.Wrapf(err, "unable to create %s", secretFile) 2295 } 2296 if err := os.Lchown(secretFile, int(hostUID), int(hostGID)); err != nil { 2297 return err 2298 } 2299 if err := os.Chmod(secretFile, os.FileMode(secr.Mode)); err != nil { 2300 return err 2301 } 2302 if err := label.Relabel(secretFile, c.config.MountLabel, false); err != nil { 2303 return err 2304 } 2305 return nil 2306 }