gitee.com/leisunstar/runtime@v0.0.0-20200521203717-5cef3e7b53f9/virtcontainers/container.go (about) 1 // +build linux 2 // Copyright (c) 2016 Intel Corporation 3 // Copyright (c) 2014,2015,2016,2017 Docker, Inc. 4 // SPDX-License-Identifier: Apache-2.0 5 // 6 7 package virtcontainers 8 9 import ( 10 "context" 11 "encoding/hex" 12 "fmt" 13 "io" 14 "os" 15 "path/filepath" 16 "syscall" 17 "time" 18 19 "github.com/containerd/cgroups" 20 vccgroups "github.com/kata-containers/runtime/virtcontainers/pkg/cgroups" 21 vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types" 22 "github.com/kata-containers/runtime/virtcontainers/types" 23 "github.com/kata-containers/runtime/virtcontainers/utils" 24 specs "github.com/opencontainers/runtime-spec/specs-go" 25 opentracing "github.com/opentracing/opentracing-go" 26 "github.com/pkg/errors" 27 "github.com/sirupsen/logrus" 28 "golang.org/x/sys/unix" 29 30 "github.com/kata-containers/runtime/virtcontainers/device/config" 31 "github.com/kata-containers/runtime/virtcontainers/device/manager" 32 "github.com/kata-containers/runtime/virtcontainers/pkg/rootless" 33 "github.com/kata-containers/runtime/virtcontainers/store" 34 ) 35 36 // https://github.com/torvalds/linux/blob/master/include/uapi/linux/major.h 37 // This file has definitions for major device numbers. 38 var cdromMajors = map[int64]string{ 39 11: "SCSI_CDROM_MAJOR", 40 15: "CDU31A_CDROM_MAJOR", 41 16: "GOLDSTAR_CDROM_MAJOR", 42 17: "OPTICS_CDROM_MAJOR", 43 18: "SANYO_CDROM_MAJOR", 44 20: "MITSUMI_X_CDROM_MAJOR", 45 23: "MITSUMI_CDROM_MAJOR", 46 24: "CDU535_CDROM_MAJOR", 47 25: "MATSUSHITA_CDROM_MAJOR", 48 26: "MATSUSHITA_CDROM2_MAJOR", 49 27: "MATSUSHITA_CDROM3_MAJOR", 50 28: "MATSUSHITA_CDROM4_MAJOR", 51 29: "AZTECH_CDROM_MAJOR", 52 32: "CM206_CDROM_MAJOR", 53 } 54 55 // https://github.com/torvalds/linux/blob/master/include/uapi/linux/major.h 56 // #define FLOPPY_MAJOR 2 57 const floppyMajor = int64(2) 58 59 // Process gathers data related to a container process. 60 type Process struct { 61 // Token is the process execution context ID. It must be 62 // unique per sandbox. 63 // Token is used to manipulate processes for containers 64 // that have not started yet, and later identify them 65 // uniquely within a sandbox. 66 Token string 67 68 // Pid is the process ID as seen by the host software 69 // stack, e.g. CRI-O, containerd. This is typically the 70 // shim PID. 71 Pid int 72 73 StartTime time.Time 74 } 75 76 // ContainerStatus describes a container status. 77 type ContainerStatus struct { 78 ID string 79 State types.ContainerState 80 PID int 81 StartTime time.Time 82 RootFs string 83 Spec *specs.Spec 84 85 // Annotations allow clients to store arbitrary values, 86 // for example to add additional status values required 87 // to support particular specifications. 88 Annotations map[string]string 89 } 90 91 // ThrottlingData gather the date related to container cpu throttling. 92 type ThrottlingData struct { 93 // Number of periods with throttling active 94 Periods uint64 `json:"periods,omitempty"` 95 // Number of periods when the container hit its throttling limit. 96 ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` 97 // Aggregate time the container was throttled for in nanoseconds. 98 ThrottledTime uint64 `json:"throttled_time,omitempty"` 99 } 100 101 // CPUUsage denotes the usage of a CPU. 102 // All CPU stats are aggregate since container inception. 103 type CPUUsage struct { 104 // Total CPU time consumed. 105 // Units: nanoseconds. 106 TotalUsage uint64 `json:"total_usage,omitempty"` 107 // Total CPU time consumed per core. 108 // Units: nanoseconds. 109 PercpuUsage []uint64 `json:"percpu_usage,omitempty"` 110 // Time spent by tasks of the cgroup in kernel mode. 111 // Units: nanoseconds. 112 UsageInKernelmode uint64 `json:"usage_in_kernelmode"` 113 // Time spent by tasks of the cgroup in user mode. 114 // Units: nanoseconds. 115 UsageInUsermode uint64 `json:"usage_in_usermode"` 116 } 117 118 // CPUStats describes the cpu stats 119 type CPUStats struct { 120 CPUUsage CPUUsage `json:"cpu_usage,omitempty"` 121 ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` 122 } 123 124 // MemoryData gather the data related to memory 125 type MemoryData struct { 126 Usage uint64 `json:"usage,omitempty"` 127 MaxUsage uint64 `json:"max_usage,omitempty"` 128 Failcnt uint64 `json:"failcnt"` 129 Limit uint64 `json:"limit"` 130 } 131 132 // MemoryStats describes the memory stats 133 type MemoryStats struct { 134 // memory used for cache 135 Cache uint64 `json:"cache,omitempty"` 136 // usage of memory 137 Usage MemoryData `json:"usage,omitempty"` 138 // usage of memory swap 139 SwapUsage MemoryData `json:"swap_usage,omitempty"` 140 // usage of kernel memory 141 KernelUsage MemoryData `json:"kernel_usage,omitempty"` 142 // usage of kernel TCP memory 143 KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` 144 // if true, memory usage is accounted for throughout a hierarchy of cgroups. 145 UseHierarchy bool `json:"use_hierarchy"` 146 147 Stats map[string]uint64 `json:"stats,omitempty"` 148 } 149 150 // PidsStats describes the pids stats 151 type PidsStats struct { 152 // number of pids in the cgroup 153 Current uint64 `json:"current,omitempty"` 154 // active pids hard limit 155 Limit uint64 `json:"limit,omitempty"` 156 } 157 158 // BlkioStatEntry gather date related to a block device 159 type BlkioStatEntry struct { 160 Major uint64 `json:"major,omitempty"` 161 Minor uint64 `json:"minor,omitempty"` 162 Op string `json:"op,omitempty"` 163 Value uint64 `json:"value,omitempty"` 164 } 165 166 // BlkioStats describes block io stats 167 type BlkioStats struct { 168 // number of bytes tranferred to and from the block device 169 IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` 170 IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` 171 IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` 172 IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` 173 IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` 174 IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` 175 IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` 176 SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` 177 } 178 179 // HugetlbStats describes hugetable memory stats 180 type HugetlbStats struct { 181 // current res_counter usage for hugetlb 182 Usage uint64 `json:"usage,omitempty"` 183 // maximum usage ever recorded. 184 MaxUsage uint64 `json:"max_usage,omitempty"` 185 // number of times hugetlb usage allocation failure. 186 Failcnt uint64 `json:"failcnt"` 187 } 188 189 // CgroupStats describes all cgroup subsystem stats 190 type CgroupStats struct { 191 CPUStats CPUStats `json:"cpu_stats,omitempty"` 192 MemoryStats MemoryStats `json:"memory_stats,omitempty"` 193 PidsStats PidsStats `json:"pids_stats,omitempty"` 194 BlkioStats BlkioStats `json:"blkio_stats,omitempty"` 195 // the map is in the format "size of hugepage: stats of the hugepage" 196 HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` 197 } 198 199 // NetworkStats describe all network stats. 200 type NetworkStats struct { 201 // Name is the name of the network interface. 202 Name string `json:"name,omitempty"` 203 204 RxBytes uint64 `json:"rx_bytes,omitempty"` 205 RxPackets uint64 `json:"rx_packets,omitempty"` 206 RxErrors uint64 `json:"rx_errors,omitempty"` 207 RxDropped uint64 `json:"rx_dropped,omitempty"` 208 TxBytes uint64 `json:"tx_bytes,omitempty"` 209 TxPackets uint64 `json:"tx_packets,omitempty"` 210 TxErrors uint64 `json:"tx_errors,omitempty"` 211 TxDropped uint64 `json:"tx_dropped,omitempty"` 212 } 213 214 // ContainerStats describes a container stats. 215 type ContainerStats struct { 216 CgroupStats *CgroupStats 217 NetworkStats []*NetworkStats 218 } 219 220 // ContainerResources describes container resources 221 type ContainerResources struct { 222 // VCPUs are the number of vCPUs that are being used by the container 223 VCPUs uint32 224 225 // Mem is the memory that is being used by the container 226 MemByte int64 227 } 228 229 // ContainerConfig describes one container runtime configuration. 230 type ContainerConfig struct { 231 ID string 232 233 // RootFs is the container workload image on the host. 234 RootFs RootFs 235 236 // ReadOnlyRootfs indicates if the rootfs should be mounted readonly 237 ReadonlyRootfs bool 238 239 // Cmd specifies the command to run on a container 240 Cmd types.Cmd 241 242 // Annotations allow clients to store arbitrary values, 243 // for example to add additional status values required 244 // to support particular specifications. 245 Annotations map[string]string 246 247 Mounts []Mount 248 249 // Device configuration for devices that must be available within the container. 250 DeviceInfos []config.DeviceInfo 251 252 // Resources container resources 253 Resources specs.LinuxResources 254 255 // Raw OCI specification, it won't be saved to disk. 256 CustomSpec *specs.Spec `json:"-"` 257 } 258 259 // valid checks that the container configuration is valid. 260 func (c *ContainerConfig) valid() bool { 261 if c == nil { 262 return false 263 } 264 265 if c.ID == "" { 266 return false 267 } 268 269 return true 270 } 271 272 // SystemMountsInfo describes additional information for system mounts that the agent 273 // needs to handle 274 type SystemMountsInfo struct { 275 // Indicates if /dev has been passed as a bind mount for the host /dev 276 BindMountDev bool 277 278 // Size of /dev/shm assigned on the host. 279 DevShmSize uint 280 } 281 282 // ContainerDevice describes a device associated with container 283 type ContainerDevice struct { 284 // ID is device id referencing the device from sandbox's device manager 285 ID string 286 287 // ContainerPath is device path displayed in container 288 ContainerPath string 289 290 // FileMode permission bits for the device. 291 FileMode os.FileMode 292 293 // UID is user ID in the container namespace 294 UID uint32 295 296 // GID is group ID in the container namespace 297 GID uint32 298 } 299 300 // RootFs describes the container's rootfs. 301 type RootFs struct { 302 // Source specifies the BlockDevice path 303 Source string 304 // Target specify where the rootfs is mounted if it has been mounted 305 Target string 306 // Type specifies the type of filesystem to mount. 307 Type string 308 // Options specifies zero or more fstab style mount options. 309 Options []string 310 // Mounted specifies whether the rootfs has be mounted or not 311 Mounted bool 312 } 313 314 // Container is composed of a set of containers and a runtime environment. 315 // A Container can be created, deleted, started, stopped, listed, entered, paused and restored. 316 type Container struct { 317 id string 318 sandboxID string 319 320 rootFs RootFs 321 322 config *ContainerConfig 323 324 sandbox *Sandbox 325 326 containerPath string 327 rootfsSuffix string 328 329 state types.ContainerState 330 331 process Process 332 333 mounts []Mount 334 335 devices []ContainerDevice 336 337 systemMountsInfo SystemMountsInfo 338 339 ctx context.Context 340 341 store *store.VCStore 342 } 343 344 // ID returns the container identifier string. 345 func (c *Container) ID() string { 346 return c.id 347 } 348 349 // Logger returns a logrus logger appropriate for logging Container messages 350 func (c *Container) Logger() *logrus.Entry { 351 return virtLog.WithFields(logrus.Fields{ 352 "subsystem": "container", 353 "sandbox": c.sandboxID, 354 }) 355 } 356 357 func (c *Container) trace(name string) (opentracing.Span, context.Context) { 358 if c.ctx == nil { 359 c.Logger().WithField("type", "bug").Error("trace called before context set") 360 c.ctx = context.Background() 361 } 362 363 span, ctx := opentracing.StartSpanFromContext(c.ctx, name) 364 365 span.SetTag("subsystem", "container") 366 367 return span, ctx 368 } 369 370 // Sandbox returns the sandbox handler related to this container. 371 func (c *Container) Sandbox() VCSandbox { 372 return c.sandbox 373 } 374 375 // Process returns the container process. 376 func (c *Container) Process() Process { 377 return c.process 378 } 379 380 // GetToken returns the token related to this container's process. 381 func (c *Container) GetToken() string { 382 return c.process.Token 383 } 384 385 // GetPid returns the pid related to this container's process. 386 func (c *Container) GetPid() int { 387 return c.process.Pid 388 } 389 390 func (c *Container) setStateFstype(fstype string) error { 391 c.state.Fstype = fstype 392 393 return nil 394 } 395 396 // GetAnnotations returns container's annotations 397 func (c *Container) GetAnnotations() map[string]string { 398 return c.config.Annotations 399 } 400 401 // GetPatchedOCISpec returns container's OCI specification 402 // This OCI specification was patched when the sandbox was created 403 // by containerCapabilities(), SetEphemeralStorageType() and others 404 // in order to support: 405 // * capabilities 406 // * Ephemeral storage 407 // * k8s empty dir 408 // If you need the original (vanilla) OCI spec, 409 // use compatoci.GetContainerSpec() instead. 410 func (c *Container) GetPatchedOCISpec() *specs.Spec { 411 return c.config.CustomSpec 412 } 413 414 // storeContainer stores a container config. 415 func (c *Container) storeContainer() error { 416 if err := c.sandbox.Save(); err != nil { 417 return err 418 } 419 return nil 420 } 421 422 // setContainerState sets both the in-memory and on-disk state of the 423 // container. 424 func (c *Container) setContainerState(state types.StateString) error { 425 if state == "" { 426 return vcTypes.ErrNeedState 427 } 428 429 c.Logger().Debugf("Setting container state from %v to %v", c.state.State, state) 430 // update in-memory state 431 c.state.State = state 432 433 if useOldStore(c.sandbox.ctx) { 434 // experimental runtime use "persist.json" which doesn't need "state.json" anymore 435 // update on-disk state 436 if err := c.store.Store(store.State, c.state); err != nil { 437 return err 438 } 439 } else { 440 // flush data to storage 441 if err := c.sandbox.Save(); err != nil { 442 return err 443 } 444 } 445 446 return nil 447 } 448 449 func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, guestSharedDir string) (string, bool, error) { 450 randBytes, err := utils.GenerateRandomBytes(8) 451 if err != nil { 452 return "", false, err 453 } 454 455 filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination)) 456 guestDest := filepath.Join(guestSharedDir, filename) 457 458 // copy file to contaier's rootfs if filesystem sharing is not supported, otherwise 459 // bind mount it in the shared directory. 460 caps := c.sandbox.hypervisor.capabilities() 461 if !caps.IsFsSharingSupported() { 462 c.Logger().Debug("filesystem sharing is not supported, files will be copied") 463 464 fileInfo, err := os.Stat(m.Source) 465 if err != nil { 466 return "", false, err 467 } 468 469 // Ignore the mount if this is not a regular file (excludes 470 // directory, socket, device, ...) as it cannot be handled by 471 // a simple copy. But this should not be treated as an error, 472 // only as a limitation. 473 if !fileInfo.Mode().IsRegular() { 474 c.Logger().WithField("ignored-file", m.Source).Debug("Ignoring non-regular file as FS sharing not supported") 475 return "", true, nil 476 } 477 478 if err := c.sandbox.agent.copyFile(m.Source, guestDest); err != nil { 479 return "", false, err 480 } 481 } else { 482 // These mounts are created in the shared dir 483 mountDest := filepath.Join(hostSharedDir, c.sandbox.id, filename) 484 if err := bindMount(c.ctx, m.Source, mountDest, false, "private"); err != nil { 485 return "", false, err 486 } 487 // Save HostPath mount value into the mount list of the container. 488 c.mounts[idx].HostPath = mountDest 489 } 490 491 return guestDest, false, nil 492 } 493 494 // mountSharedDirMounts handles bind-mounts by bindmounting to the host shared 495 // directory which is mounted through 9pfs in the VM. 496 // It also updates the container mount list with the HostPath info, and store 497 // container mounts to the storage. This way, we will have the HostPath info 498 // available when we will need to unmount those mounts. 499 func (c *Container) mountSharedDirMounts(hostSharedDir, guestSharedDir string) (sharedDirMounts map[string]Mount, ignoredMounts map[string]Mount, err error) { 500 sharedDirMounts = make(map[string]Mount) 501 ignoredMounts = make(map[string]Mount) 502 var devicesToDetach []string 503 defer func() { 504 if err != nil { 505 for _, id := range devicesToDetach { 506 c.sandbox.devManager.DetachDevice(id, c.sandbox) 507 } 508 } 509 }() 510 for idx, m := range c.mounts { 511 // Skip mounting certain system paths from the source on the host side 512 // into the container as it does not make sense to do so. 513 // Example sources could be /sys/fs/cgroup etc. 514 if isSystemMount(m.Source) { 515 continue 516 } 517 518 if m.Type != "bind" { 519 continue 520 } 521 522 // We need to treat /dev/shm as a special case. This is passed as a bind mount in the spec, 523 // but it does not make sense to pass this as a 9p mount from the host side. 524 // This needs to be handled purely in the guest, by allocating memory for this inside the VM. 525 if m.Destination == "/dev/shm" { 526 continue 527 } 528 529 // Check if mount is a block device file. If it is, the block device will be attached to the host 530 // instead of passing this as a shared mount. 531 if len(m.BlockDeviceID) > 0 { 532 // Attach this block device, all other devices passed in the config have been attached at this point 533 if err = c.sandbox.devManager.AttachDevice(m.BlockDeviceID, c.sandbox); err != nil { 534 return nil, nil, err 535 } 536 devicesToDetach = append(devicesToDetach, m.BlockDeviceID) 537 continue 538 } 539 540 // Ignore /dev, directories and all other device files. We handle 541 // only regular files in /dev. It does not make sense to pass the host 542 // device nodes to the guest. 543 if isHostDevice(m.Destination) { 544 continue 545 } 546 547 var ignore bool 548 var guestDest string 549 guestDest, ignore, err = c.shareFiles(m, idx, hostSharedDir, guestSharedDir) 550 if err != nil { 551 return nil, nil, err 552 } 553 554 // Expand the list of mounts to ignore. 555 if ignore { 556 ignoredMounts[m.Source] = Mount{Source: m.Source} 557 continue 558 } 559 560 // Check if mount is readonly, let the agent handle the readonly mount 561 // within the VM. 562 readonly := false 563 for _, flag := range m.Options { 564 if flag == "ro" { 565 readonly = true 566 break 567 } 568 } 569 570 sharedDirMount := Mount{ 571 Source: guestDest, 572 Destination: m.Destination, 573 Type: m.Type, 574 Options: m.Options, 575 ReadOnly: readonly, 576 } 577 578 sharedDirMounts[sharedDirMount.Destination] = sharedDirMount 579 } 580 581 return sharedDirMounts, ignoredMounts, nil 582 } 583 584 func (c *Container) unmountHostMounts() error { 585 var span opentracing.Span 586 span, c.ctx = c.trace("unmountHostMounts") 587 defer span.Finish() 588 589 for _, m := range c.mounts { 590 if m.HostPath != "" { 591 span, _ := c.trace("unmount") 592 span.SetTag("host-path", m.HostPath) 593 594 if err := syscall.Unmount(m.HostPath, syscall.MNT_DETACH|UmountNoFollow); err != nil { 595 c.Logger().WithFields(logrus.Fields{ 596 "host-path": m.HostPath, 597 "error": err, 598 }).Warn("Could not umount") 599 return err 600 } 601 602 if m.Type == "bind" { 603 s, err := os.Stat(m.HostPath) 604 if err != nil { 605 return errors.Wrapf(err, "Could not stat host-path %v", m.HostPath) 606 } 607 // Remove the empty file or directory 608 if s.Mode().IsRegular() && s.Size() == 0 { 609 os.Remove(m.HostPath) 610 } 611 if s.Mode().IsDir() { 612 syscall.Rmdir(m.HostPath) 613 } 614 } 615 616 span.Finish() 617 } 618 } 619 620 return nil 621 } 622 623 func filterDevices(c *Container, devices []ContainerDevice) (ret []ContainerDevice) { 624 for _, dev := range devices { 625 major, _ := c.sandbox.devManager.GetDeviceByID(dev.ID).GetMajorMinor() 626 if _, ok := cdromMajors[major]; ok { 627 c.Logger().WithFields(logrus.Fields{ 628 "device": dev.ContainerPath, 629 }).Info("Not attach device because it is a CDROM") 630 continue 631 } 632 633 if major == floppyMajor { 634 c.Logger().WithFields(logrus.Fields{ 635 "device": dev.ContainerPath, 636 }).Info("Not attaching device because it is a floppy drive") 637 continue 638 } 639 640 ret = append(ret, dev) 641 } 642 return 643 } 644 645 func (c *Container) createBlockDevices() error { 646 if !c.checkBlockDeviceSupport() { 647 c.Logger().Warn("Block device not supported") 648 return nil 649 } 650 651 // iterate all mounts and create block device if it's block based. 652 for i, m := range c.mounts { 653 if len(m.BlockDeviceID) > 0 || m.Type != "bind" { 654 // Non-empty m.BlockDeviceID indicates there's already one device 655 // associated with the mount,so no need to create a new device for it 656 // and we only create block device for bind mount 657 continue 658 } 659 660 var stat unix.Stat_t 661 if err := unix.Stat(m.Source, &stat); err != nil { 662 return fmt.Errorf("stat %q failed: %v", m.Source, err) 663 } 664 665 var di *config.DeviceInfo 666 var err error 667 668 // Check if mount is a block device file. If it is, the block device will be attached to the host 669 // instead of passing this as a shared mount. 670 if stat.Mode&unix.S_IFBLK == unix.S_IFBLK { 671 di = &config.DeviceInfo{ 672 HostPath: m.Source, 673 ContainerPath: m.Destination, 674 DevType: "b", 675 Major: int64(unix.Major(stat.Rdev)), 676 Minor: int64(unix.Minor(stat.Rdev)), 677 } 678 // check whether source can be used as a pmem device 679 } else if di, err = config.PmemDeviceInfo(m.Source, m.Destination); err != nil { 680 c.Logger().WithError(err). 681 WithField("mount-source", m.Source). 682 Debug("no loop device") 683 } 684 685 if err == nil && di != nil { 686 b, err := c.sandbox.devManager.NewDevice(*di) 687 688 if err != nil { 689 // Do not return an error, try to create 690 // devices for other mounts 691 c.Logger().WithError(err).WithField("mount-source", m.Source). 692 Error("device manager failed to create new device") 693 continue 694 695 } 696 697 c.mounts[i].BlockDeviceID = b.DeviceID() 698 } 699 } 700 701 return nil 702 } 703 704 // newContainer creates a Container structure from a sandbox and a container configuration. 705 func newContainer(sandbox *Sandbox, contConfig *ContainerConfig) (*Container, error) { 706 span, _ := sandbox.trace("newContainer") 707 defer span.Finish() 708 709 if !contConfig.valid() { 710 return &Container{}, fmt.Errorf("Invalid container configuration") 711 } 712 713 c := &Container{ 714 id: contConfig.ID, 715 sandboxID: sandbox.id, 716 rootFs: contConfig.RootFs, 717 config: contConfig, 718 sandbox: sandbox, 719 containerPath: filepath.Join(sandbox.id, contConfig.ID), 720 rootfsSuffix: "rootfs", 721 state: types.ContainerState{}, 722 process: Process{}, 723 mounts: contConfig.Mounts, 724 ctx: sandbox.ctx, 725 } 726 727 if useOldStore(sandbox.ctx) { 728 ctrStore, err := store.NewVCContainerStore(sandbox.ctx, c.sandboxID, c.id) 729 if err != nil { 730 return nil, err 731 } 732 c.store = ctrStore 733 state, err := c.store.LoadContainerState() 734 if err == nil { 735 c.state = state 736 } 737 738 var process Process 739 if err := c.store.Load(store.Process, &process); err == nil { 740 c.process = process 741 } 742 } else { 743 // experimental runtime use "persist.json" instead of legacy "state.json" as storage 744 err := c.Restore() 745 if err == nil { 746 //container restored 747 return c, nil 748 } 749 750 // Unexpected error 751 if !os.IsNotExist(err) && err != errContainerPersistNotExist { 752 return nil, err 753 } 754 } 755 756 // Go to next step for first created container 757 if err := c.createMounts(); err != nil { 758 return nil, err 759 } 760 761 if err := c.createDevices(contConfig); err != nil { 762 return nil, err 763 } 764 765 return c, nil 766 } 767 768 func (c *Container) loadMounts() ([]Mount, error) { 769 var mounts []Mount 770 if err := c.store.Load(store.Mounts, &mounts); err != nil { 771 return []Mount{}, err 772 } 773 774 return mounts, nil 775 } 776 777 func (c *Container) loadDevices() ([]ContainerDevice, error) { 778 var devices []ContainerDevice 779 780 if err := c.store.Load(store.DeviceIDs, &devices); err != nil { 781 return []ContainerDevice{}, err 782 } 783 784 return devices, nil 785 } 786 787 func (c *Container) createMounts() error { 788 if useOldStore(c.sandbox.ctx) { 789 mounts, err := c.loadMounts() 790 if err == nil { 791 // restore mounts from disk 792 c.mounts = mounts 793 return nil 794 } 795 } 796 797 // Create block devices for newly created container 798 if err := c.createBlockDevices(); err != nil { 799 return err 800 } 801 802 return nil 803 } 804 805 func (c *Container) createDevices(contConfig *ContainerConfig) error { 806 // If sandbox supports "newstore", only newly created container can reach this function, 807 // so we don't call restore when `supportNewStore` is true 808 if useOldStore(c.sandbox.ctx) { 809 // Devices will be found in storage after create stage has completed. 810 // We load devices from storage at all other stages. 811 storedDevices, err := c.loadDevices() 812 if err == nil { 813 c.devices = storedDevices 814 return nil 815 } 816 } 817 818 // If devices were not found in storage, create Device implementations 819 // from the configuration. This should happen at create. 820 var storedDevices []ContainerDevice 821 for _, info := range contConfig.DeviceInfos { 822 dev, err := c.sandbox.devManager.NewDevice(info) 823 if err != nil { 824 return err 825 } 826 827 storedDevices = append(storedDevices, ContainerDevice{ 828 ID: dev.DeviceID(), 829 ContainerPath: info.ContainerPath, 830 FileMode: info.FileMode, 831 UID: info.UID, 832 GID: info.GID, 833 }) 834 } 835 c.devices = filterDevices(c, storedDevices) 836 return nil 837 } 838 839 // rollbackFailingContainerCreation rolls back important steps that might have 840 // been performed before the container creation failed. 841 // - Unplug CPU and memory resources from the VM. 842 // - Unplug devices from the VM. 843 func (c *Container) rollbackFailingContainerCreation() { 844 if err := c.detachDevices(); err != nil { 845 c.Logger().WithError(err).Error("rollback failed detachDevices()") 846 } 847 if err := c.removeDrive(); err != nil { 848 c.Logger().WithError(err).Error("rollback failed removeDrive()") 849 } 850 if err := c.unmountHostMounts(); err != nil { 851 c.Logger().WithError(err).Error("rollback failed unmountHostMounts()") 852 } 853 if err := bindUnmountContainerRootfs(c.ctx, kataHostSharedDir(), c.sandbox.id, c.id); err != nil { 854 c.Logger().WithError(err).Error("rollback failed bindUnmountContainerRootfs()") 855 } 856 } 857 858 func (c *Container) checkBlockDeviceSupport() bool { 859 if !c.sandbox.config.HypervisorConfig.DisableBlockDeviceUse { 860 agentCaps := c.sandbox.agent.capabilities() 861 hypervisorCaps := c.sandbox.hypervisor.capabilities() 862 863 if agentCaps.IsBlockDeviceSupported() && hypervisorCaps.IsBlockDeviceHotplugSupported() { 864 return true 865 } 866 } 867 868 return false 869 } 870 871 // createContainer creates and start a container inside a Sandbox. It has to be 872 // called only when a new container, not known by the sandbox, has to be created. 873 func (c *Container) create() (err error) { 874 // In case the container creation fails, the following takes care 875 // of rolling back all the actions previously performed. 876 defer func() { 877 if err != nil { 878 c.rollbackFailingContainerCreation() 879 } 880 }() 881 882 if c.checkBlockDeviceSupport() { 883 if err = c.hotplugDrive(); err != nil { 884 return 885 } 886 } 887 888 var ( 889 machineType = c.sandbox.config.HypervisorConfig.HypervisorMachineType 890 normalAttachedDevs []ContainerDevice //for q35: normally attached devices 891 delayAttachedDevs []ContainerDevice //for q35: delay attached devices, for example, large bar space device 892 ) 893 // Fix: https://github.com/kata-containers/runtime/issues/2460 894 if machineType == QemuQ35 { 895 // add Large Bar space device to delayAttachedDevs 896 for _, device := range c.devices { 897 var isLargeBarSpace bool 898 isLargeBarSpace, err = manager.IsVFIOLargeBarSpaceDevice(device.ContainerPath) 899 if err != nil { 900 return 901 } 902 if isLargeBarSpace { 903 delayAttachedDevs = append(delayAttachedDevs, device) 904 } else { 905 normalAttachedDevs = append(normalAttachedDevs, device) 906 } 907 } 908 } else { 909 normalAttachedDevs = c.devices 910 } 911 912 c.Logger().WithFields(logrus.Fields{ 913 "machine_type": machineType, 914 "devices": normalAttachedDevs, 915 }).Info("normal attach devices") 916 if len(normalAttachedDevs) > 0 { 917 if err = c.attachDevices(normalAttachedDevs); err != nil { 918 return 919 } 920 } 921 922 // Deduce additional system mount info that should be handled by the agent 923 // inside the VM 924 c.getSystemMountInfo() 925 926 process, err := c.sandbox.agent.createContainer(c.sandbox, c) 927 if err != nil { 928 return err 929 } 930 c.process = *process 931 932 // lazy attach device after createContainer for q35 933 if machineType == QemuQ35 && len(delayAttachedDevs) > 0 { 934 c.Logger().WithFields(logrus.Fields{ 935 "machine_type": machineType, 936 "devices": delayAttachedDevs, 937 }).Info("lazy attach devices") 938 if err = c.attachDevices(delayAttachedDevs); err != nil { 939 return 940 } 941 } 942 943 if !rootless.IsRootless() && !c.sandbox.config.SandboxCgroupOnly { 944 if err = c.cgroupsCreate(); err != nil { 945 return 946 } 947 } 948 949 if err = c.setContainerState(types.StateReady); err != nil { 950 return 951 } 952 953 return nil 954 } 955 956 func (c *Container) delete() error { 957 if c.state.State != types.StateReady && 958 c.state.State != types.StateStopped { 959 return fmt.Errorf("Container not ready or stopped, impossible to delete") 960 } 961 962 // Remove the container from sandbox structure 963 if err := c.sandbox.removeContainer(c.id); err != nil { 964 return err 965 } 966 967 // If running rootless, there are no cgroups to remove 968 if !c.sandbox.config.SandboxCgroupOnly || !rootless.IsRootless() { 969 if err := c.cgroupsDelete(); err != nil { 970 return err 971 } 972 } 973 974 return c.sandbox.storeSandbox() 975 } 976 977 // checkSandboxRunning validates the container state. 978 // 979 // cmd specifies the operation (or verb) that the retrieval is destined 980 // for and is only used to make the returned error as descriptive as 981 // possible. 982 func (c *Container) checkSandboxRunning(cmd string) error { 983 if cmd == "" { 984 return fmt.Errorf("Cmd cannot be empty") 985 } 986 987 if c.sandbox.state.State != types.StateRunning { 988 return fmt.Errorf("Sandbox not running, impossible to %s the container", cmd) 989 } 990 991 return nil 992 } 993 994 func (c *Container) getSystemMountInfo() { 995 // check if /dev needs to be bind mounted from host /dev 996 c.systemMountsInfo.BindMountDev = false 997 998 for _, m := range c.mounts { 999 if m.Source == "/dev" && m.Destination == "/dev" && m.Type == "bind" { 1000 c.systemMountsInfo.BindMountDev = true 1001 } 1002 } 1003 1004 // TODO Deduce /dev/shm size. See https://github.com/clearcontainers/runtime/issues/138 1005 } 1006 1007 func (c *Container) start() error { 1008 if err := c.checkSandboxRunning("start"); err != nil { 1009 return err 1010 } 1011 1012 if c.state.State != types.StateReady && 1013 c.state.State != types.StateStopped { 1014 return fmt.Errorf("Container not ready or stopped, impossible to start") 1015 } 1016 1017 if err := c.state.ValidTransition(c.state.State, types.StateRunning); err != nil { 1018 return err 1019 } 1020 1021 if err := c.sandbox.agent.startContainer(c.sandbox, c); err != nil { 1022 c.Logger().WithError(err).Error("Failed to start container") 1023 1024 if err := c.stop(true); err != nil { 1025 c.Logger().WithError(err).Warn("Failed to stop container") 1026 } 1027 return err 1028 } 1029 1030 return c.setContainerState(types.StateRunning) 1031 } 1032 1033 func (c *Container) stop(force bool) error { 1034 span, _ := c.trace("stop") 1035 defer span.Finish() 1036 1037 // In case the container status has been updated implicitly because 1038 // the container process has terminated, it might be possible that 1039 // someone try to stop the container, and we don't want to issue an 1040 // error in that case. This should be a no-op. 1041 // 1042 // This has to be handled before the transition validation since this 1043 // is an exception. 1044 if c.state.State == types.StateStopped { 1045 c.Logger().Info("Container already stopped") 1046 return nil 1047 } 1048 1049 if err := c.state.ValidTransition(c.state.State, types.StateStopped); err != nil { 1050 return err 1051 } 1052 1053 defer func() { 1054 span, _ := c.trace("stopShim") 1055 defer span.Finish() 1056 1057 // If shim is still running something went wrong 1058 // Make sure we stop the shim process 1059 if running, _ := isShimRunning(c.process.Pid); running { 1060 l := c.Logger() 1061 l.Error("Failed to stop container so stopping dangling shim") 1062 if err := stopShim(c.process.Pid); err != nil { 1063 l.WithError(err).Warn("failed to stop shim") 1064 } 1065 } 1066 1067 }() 1068 1069 // Here we expect that stop() has been called because the container 1070 // process returned or because it received a signal. In case of a 1071 // signal, we want to give it some time to end the container process. 1072 // However, if the signal didn't reach its goal, the caller still 1073 // expects this container to be stopped, that's why we should not 1074 // return an error, but instead try to kill it forcefully. 1075 if err := waitForShim(c.process.Pid); err != nil { 1076 // Force the container to be killed. 1077 if err := c.kill(syscall.SIGKILL, true); err != nil && !force { 1078 return err 1079 } 1080 1081 // Wait for the end of container process. We expect this call 1082 // to succeed. Indeed, we have already given a second chance 1083 // to the container by trying to kill it with SIGKILL, there 1084 // is no reason to try to go further if we got an error. 1085 if err := waitForShim(c.process.Pid); err != nil && !force { 1086 return err 1087 } 1088 } 1089 1090 // Force the container to be killed. For most of the cases, this 1091 // should not matter and it should return an error that will be 1092 // ignored. 1093 // But for the specific case where the shim has been SIGKILL'ed, 1094 // the container is still running inside the VM. And this is why 1095 // this signal will ensure the container will get killed to match 1096 // the state of the shim. This will allow the following call to 1097 // stopContainer() to succeed in such particular case. 1098 c.kill(syscall.SIGKILL, true) 1099 1100 // Since the agent has supported the MultiWaitProcess, it's better to 1101 // wait the process here to make sure the process has exited before to 1102 // issue stopContainer, otherwise the RemoveContainerRequest in it will 1103 // get failed if the process hasn't exited. 1104 c.sandbox.agent.waitProcess(c, c.id) 1105 1106 defer func() { 1107 // Save device and drive data. 1108 // TODO: can we merge this saving with setContainerState()? 1109 if err := c.sandbox.Save(); err != nil { 1110 c.Logger().WithError(err).Info("save container state failed") 1111 } 1112 }() 1113 1114 if err := c.sandbox.agent.stopContainer(c.sandbox, *c); err != nil && !force { 1115 return err 1116 } 1117 1118 if err := c.unmountHostMounts(); err != nil && !force { 1119 return err 1120 } 1121 1122 if err := bindUnmountContainerRootfs(c.ctx, kataHostSharedDir(), c.sandbox.id, c.id); err != nil && !force { 1123 return err 1124 } 1125 1126 if err := c.detachDevices(); err != nil && !force { 1127 return err 1128 } 1129 1130 if err := c.removeDrive(); err != nil && !force { 1131 return err 1132 } 1133 1134 shareDir := filepath.Join(kataHostSharedDir(), c.sandbox.id, c.id) 1135 if err := syscall.Rmdir(shareDir); err != nil { 1136 c.Logger().WithError(err).WithField("share-dir", shareDir).Warn("Could not remove container share dir") 1137 } 1138 1139 // container was killed by force, container MUST change its state 1140 // as soon as possible just in case one of below operations fail leaving 1141 // the containers in a bad state. 1142 if err := c.setContainerState(types.StateStopped); err != nil { 1143 return err 1144 } 1145 1146 return nil 1147 } 1148 1149 func (c *Container) enter(cmd types.Cmd) (*Process, error) { 1150 if err := c.checkSandboxRunning("enter"); err != nil { 1151 return nil, err 1152 } 1153 1154 if c.state.State != types.StateReady && 1155 c.state.State != types.StateRunning { 1156 return nil, fmt.Errorf("Container not ready or running, " + 1157 "impossible to enter") 1158 } 1159 1160 process, err := c.sandbox.agent.exec(c.sandbox, *c, cmd) 1161 if err != nil { 1162 return nil, err 1163 } 1164 1165 return process, nil 1166 } 1167 1168 func (c *Container) wait(processID string) (int32, error) { 1169 if c.state.State != types.StateReady && 1170 c.state.State != types.StateRunning { 1171 return 0, fmt.Errorf("Container not ready or running, " + 1172 "impossible to wait") 1173 } 1174 1175 return c.sandbox.agent.waitProcess(c, processID) 1176 } 1177 1178 func (c *Container) kill(signal syscall.Signal, all bool) error { 1179 return c.signalProcess(c.process.Token, signal, all) 1180 } 1181 1182 func (c *Container) signalProcess(processID string, signal syscall.Signal, all bool) error { 1183 if c.sandbox.state.State != types.StateReady && c.sandbox.state.State != types.StateRunning { 1184 return fmt.Errorf("Sandbox not ready or running, impossible to signal the container") 1185 } 1186 1187 if c.state.State != types.StateReady && c.state.State != types.StateRunning && c.state.State != types.StatePaused { 1188 return fmt.Errorf("Container not ready, running or paused, impossible to signal the container") 1189 } 1190 1191 return c.sandbox.agent.signalProcess(c, processID, signal, all) 1192 } 1193 1194 func (c *Container) winsizeProcess(processID string, height, width uint32) error { 1195 if c.state.State != types.StateReady && c.state.State != types.StateRunning { 1196 return fmt.Errorf("Container not ready or running, impossible to signal the container") 1197 } 1198 1199 return c.sandbox.agent.winsizeProcess(c, processID, height, width) 1200 } 1201 1202 func (c *Container) ioStream(processID string) (io.WriteCloser, io.Reader, io.Reader, error) { 1203 if c.state.State != types.StateReady && c.state.State != types.StateRunning { 1204 return nil, nil, nil, fmt.Errorf("Container not ready or running, impossible to signal the container") 1205 } 1206 1207 stream := newIOStream(c.sandbox, c, processID) 1208 1209 return stream.stdin(), stream.stdout(), stream.stderr(), nil 1210 } 1211 1212 func (c *Container) processList(options ProcessListOptions) (ProcessList, error) { 1213 if err := c.checkSandboxRunning("ps"); err != nil { 1214 return nil, err 1215 } 1216 1217 if c.state.State != types.StateRunning { 1218 return nil, fmt.Errorf("Container not running, impossible to list processes") 1219 } 1220 1221 return c.sandbox.agent.processListContainer(c.sandbox, *c, options) 1222 } 1223 1224 func (c *Container) stats() (*ContainerStats, error) { 1225 if err := c.checkSandboxRunning("stats"); err != nil { 1226 return nil, err 1227 } 1228 return c.sandbox.agent.statsContainer(c.sandbox, *c) 1229 } 1230 1231 func (c *Container) update(resources specs.LinuxResources) error { 1232 if err := c.checkSandboxRunning("update"); err != nil { 1233 return err 1234 } 1235 1236 if state := c.state.State; !(state == types.StateRunning || state == types.StateReady) { 1237 return fmt.Errorf("Container(%s) not running or ready, impossible to update", state) 1238 } 1239 1240 if c.config.Resources.CPU == nil { 1241 c.config.Resources.CPU = &specs.LinuxCPU{} 1242 } 1243 1244 if cpu := resources.CPU; cpu != nil { 1245 if p := cpu.Period; p != nil && *p != 0 { 1246 c.config.Resources.CPU.Period = p 1247 } 1248 if q := cpu.Quota; q != nil && *q != 0 { 1249 c.config.Resources.CPU.Quota = q 1250 } 1251 } 1252 1253 if c.config.Resources.Memory == nil { 1254 c.config.Resources.Memory = &specs.LinuxMemory{} 1255 } 1256 1257 if mem := resources.Memory; mem != nil && mem.Limit != nil { 1258 c.config.Resources.Memory.Limit = mem.Limit 1259 } 1260 1261 if err := c.sandbox.updateResources(); err != nil { 1262 return err 1263 } 1264 1265 if !c.sandbox.config.SandboxCgroupOnly { 1266 if err := c.cgroupsUpdate(resources); err != nil { 1267 return err 1268 } 1269 } 1270 1271 return c.sandbox.agent.updateContainer(c.sandbox, *c, resources) 1272 } 1273 1274 func (c *Container) pause() error { 1275 if err := c.checkSandboxRunning("pause"); err != nil { 1276 return err 1277 } 1278 1279 if c.state.State != types.StateRunning { 1280 return fmt.Errorf("Container not running, impossible to pause") 1281 } 1282 1283 if err := c.sandbox.agent.pauseContainer(c.sandbox, *c); err != nil { 1284 return err 1285 } 1286 1287 return c.setContainerState(types.StatePaused) 1288 } 1289 1290 func (c *Container) resume() error { 1291 if err := c.checkSandboxRunning("resume"); err != nil { 1292 return err 1293 } 1294 1295 if c.state.State != types.StatePaused { 1296 return fmt.Errorf("Container not paused, impossible to resume") 1297 } 1298 1299 if err := c.sandbox.agent.resumeContainer(c.sandbox, *c); err != nil { 1300 return err 1301 } 1302 1303 return c.setContainerState(types.StateRunning) 1304 } 1305 1306 func (c *Container) hotplugDrive() error { 1307 var dev device 1308 var err error 1309 1310 // container rootfs is blockdevice backed and isn't mounted 1311 if !c.rootFs.Mounted { 1312 dev, err = getDeviceForPath(c.rootFs.Source) 1313 // there is no "rootfs" dir on block device backed rootfs 1314 c.rootfsSuffix = "" 1315 } else { 1316 dev, err = getDeviceForPath(c.rootFs.Target) 1317 } 1318 1319 if err == errMountPointNotFound { 1320 return nil 1321 } 1322 1323 if err != nil { 1324 return err 1325 } 1326 1327 c.Logger().WithFields(logrus.Fields{ 1328 "device-major": dev.major, 1329 "device-minor": dev.minor, 1330 "mount-point": dev.mountPoint, 1331 }).Info("device details") 1332 1333 isDM, err := checkStorageDriver(dev.major, dev.minor) 1334 if err != nil { 1335 return err 1336 } 1337 1338 if !isDM { 1339 return nil 1340 } 1341 1342 devicePath := c.rootFs.Source 1343 fsType := c.rootFs.Type 1344 if c.rootFs.Mounted { 1345 if dev.mountPoint == c.rootFs.Target { 1346 c.rootfsSuffix = "" 1347 } 1348 // If device mapper device, then fetch the full path of the device 1349 devicePath, fsType, err = utils.GetDevicePathAndFsType(dev.mountPoint) 1350 if err != nil { 1351 return err 1352 } 1353 } 1354 1355 devicePath, err = filepath.EvalSymlinks(devicePath) 1356 if err != nil { 1357 return err 1358 } 1359 1360 c.Logger().WithFields(logrus.Fields{ 1361 "device-path": devicePath, 1362 "fs-type": fsType, 1363 }).Info("Block device detected") 1364 1365 if err = c.plugDevice(devicePath); err != nil { 1366 return err 1367 } 1368 1369 return c.setStateFstype(fsType) 1370 } 1371 1372 func (c *Container) plugDevice(devicePath string) error { 1373 var stat unix.Stat_t 1374 if err := unix.Stat(devicePath, &stat); err != nil { 1375 return fmt.Errorf("stat %q failed: %v", devicePath, err) 1376 } 1377 1378 if c.checkBlockDeviceSupport() && stat.Mode&unix.S_IFBLK == unix.S_IFBLK { 1379 b, err := c.sandbox.devManager.NewDevice(config.DeviceInfo{ 1380 HostPath: devicePath, 1381 ContainerPath: filepath.Join(kataGuestSharedDir(), c.id), 1382 DevType: "b", 1383 Major: int64(unix.Major(stat.Rdev)), 1384 Minor: int64(unix.Minor(stat.Rdev)), 1385 }) 1386 if err != nil { 1387 return fmt.Errorf("device manager failed to create rootfs device for %q: %v", devicePath, err) 1388 } 1389 1390 c.state.BlockDeviceID = b.DeviceID() 1391 1392 // attach rootfs device 1393 if err := c.sandbox.devManager.AttachDevice(b.DeviceID(), c.sandbox); err != nil { 1394 return err 1395 } 1396 } 1397 return nil 1398 } 1399 1400 // isDriveUsed checks if a drive has been used for container rootfs 1401 func (c *Container) isDriveUsed() bool { 1402 return !(c.state.Fstype == "") 1403 } 1404 1405 func (c *Container) removeDrive() (err error) { 1406 if c.isDriveUsed() { 1407 c.Logger().Info("unplugging block device") 1408 1409 devID := c.state.BlockDeviceID 1410 err := c.sandbox.devManager.DetachDevice(devID, c.sandbox) 1411 if err != nil && err != manager.ErrDeviceNotAttached { 1412 return err 1413 } 1414 1415 if err = c.sandbox.devManager.RemoveDevice(devID); err != nil { 1416 c.Logger().WithFields(logrus.Fields{ 1417 "container": c.id, 1418 "device-id": devID, 1419 }).WithError(err).Error("remove device failed") 1420 1421 // ignore the device not exist error 1422 if err != manager.ErrDeviceNotExist { 1423 return err 1424 } 1425 } 1426 } 1427 1428 return nil 1429 } 1430 1431 func (c *Container) attachDevices(devices []ContainerDevice) error { 1432 // there's no need to do rollback when error happens, 1433 // because if attachDevices fails, container creation will fail too, 1434 // and rollbackFailingContainerCreation could do all the rollbacks 1435 1436 // since devices with large bar space require delayed attachment, 1437 // the devices need to be split into two lists, normalAttachedDevs and delayAttachedDevs. 1438 // so c.device is not used here. See issue https://github.com/kata-containers/runtime/issues/2460. 1439 for _, dev := range devices { 1440 if err := c.sandbox.devManager.AttachDevice(dev.ID, c.sandbox); err != nil { 1441 return err 1442 } 1443 } 1444 return nil 1445 } 1446 1447 func (c *Container) detachDevices() error { 1448 for _, dev := range c.devices { 1449 err := c.sandbox.devManager.DetachDevice(dev.ID, c.sandbox) 1450 if err != nil && err != manager.ErrDeviceNotAttached { 1451 return err 1452 } 1453 1454 if err = c.sandbox.devManager.RemoveDevice(dev.ID); err != nil { 1455 c.Logger().WithFields(logrus.Fields{ 1456 "container": c.id, 1457 "device-id": dev.ID, 1458 }).WithError(err).Error("remove device failed") 1459 1460 // ignore the device not exist error 1461 if err != manager.ErrDeviceNotExist { 1462 return err 1463 } 1464 } 1465 } 1466 return nil 1467 } 1468 1469 // cgroupsCreate creates cgroups on the host for the associated container 1470 func (c *Container) cgroupsCreate() (err error) { 1471 spec := c.GetPatchedOCISpec() 1472 if spec == nil { 1473 return errorMissingOCISpec 1474 } 1475 1476 // https://github.com/kata-containers/runtime/issues/168 1477 resources := specs.LinuxResources{ 1478 CPU: nil, 1479 } 1480 1481 if spec.Linux != nil && spec.Linux.Resources != nil { 1482 resources.CPU = validCPUResources(spec.Linux.Resources.CPU) 1483 } 1484 1485 c.state.CgroupPath, err = vccgroups.ValidCgroupPath(spec.Linux.CgroupsPath, c.sandbox.config.SystemdCgroup) 1486 if err != nil { 1487 return fmt.Errorf("Invalid cgroup path: %v", err) 1488 } 1489 1490 cgroup, err := cgroupsNewFunc(cgroups.V1, 1491 cgroups.StaticPath(c.state.CgroupPath), &resources) 1492 if err != nil { 1493 return fmt.Errorf("Could not create cgroup for %v: %v", c.state.CgroupPath, err) 1494 } 1495 1496 c.config.Resources = resources 1497 1498 // Add shim into cgroup 1499 if c.process.Pid > 0 { 1500 if err := cgroup.Add(cgroups.Process{Pid: c.process.Pid}); err != nil { 1501 return fmt.Errorf("Could not add PID %d to cgroup %v: %v", c.process.Pid, spec.Linux.CgroupsPath, err) 1502 } 1503 } 1504 1505 return nil 1506 } 1507 1508 // cgroupsDelete deletes the cgroups on the host for the associated container 1509 func (c *Container) cgroupsDelete() error { 1510 1511 if c.state.CgroupPath == "" { 1512 c.Logger().Debug("container does not have host cgroups: nothing to update") 1513 return nil 1514 } 1515 1516 cgroup, err := cgroupsLoadFunc(cgroups.V1, 1517 cgroups.StaticPath(c.state.CgroupPath)) 1518 1519 if err == cgroups.ErrCgroupDeleted { 1520 // cgroup already deleted 1521 return nil 1522 } 1523 1524 if err != nil { 1525 return fmt.Errorf("Could not load container cgroup %v: %v", c.state.CgroupPath, err) 1526 } 1527 1528 // move running process here, that way cgroup can be removed 1529 parent, err := parentCgroup(cgroups.V1, c.state.CgroupPath) 1530 if err != nil { 1531 // parent cgroup doesn't exist, that means there are no process running 1532 // and the container cgroup was removed. 1533 c.Logger().WithError(err).Warn("Container cgroup doesn't exist") 1534 return nil 1535 } 1536 1537 if err := cgroup.MoveTo(parent); err != nil { 1538 // Don't fail, cgroup can be deleted 1539 c.Logger().WithError(err).Warn("Could not move container process into parent cgroup") 1540 } 1541 1542 if err := cgroup.Delete(); err != nil { 1543 return fmt.Errorf("Could not delete container cgroup path='%v': error='%v'", c.state.CgroupPath, err) 1544 } 1545 1546 return nil 1547 } 1548 1549 // cgroupsUpdate updates cgroups on the host for the associated container 1550 func (c *Container) cgroupsUpdate(resources specs.LinuxResources) error { 1551 1552 if c.state.CgroupPath == "" { 1553 c.Logger().Debug("container does not have host cgroups: nothing to update") 1554 return nil 1555 } 1556 cgroup, err := cgroupsLoadFunc(cgroups.V1, 1557 cgroups.StaticPath(c.state.CgroupPath)) 1558 if err != nil { 1559 return fmt.Errorf("Could not load cgroup %v: %v", c.state.CgroupPath, err) 1560 } 1561 1562 // Issue: https://github.com/kata-containers/runtime/issues/168 1563 r := specs.LinuxResources{ 1564 CPU: validCPUResources(resources.CPU), 1565 } 1566 1567 // update cgroup 1568 if err := cgroup.Update(&r); err != nil { 1569 return fmt.Errorf("Could not update container cgroup path='%v': error='%v'", c.state.CgroupPath, err) 1570 } 1571 1572 // store new resources 1573 c.config.Resources = r 1574 if err := c.storeContainer(); err != nil { 1575 return err 1576 } 1577 1578 return nil 1579 }