github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/sandbox.go (about) 1 // Copyright (c) 2016 Intel Corporation 2 // Copyright (c) 2020 Adobe Inc. 3 // 4 // SPDX-License-Identifier: Apache-2.0 5 // 6 7 package virtcontainers 8 9 import ( 10 "context" 11 "fmt" 12 "io" 13 "math" 14 "net" 15 "os" 16 "strings" 17 "sync" 18 "syscall" 19 20 "github.com/containerd/cgroups" 21 "github.com/containernetworking/plugins/pkg/ns" 22 "github.com/opencontainers/runc/libcontainer/configs" 23 specs "github.com/opencontainers/runtime-spec/specs-go" 24 opentracing "github.com/opentracing/opentracing-go" 25 "github.com/pkg/errors" 26 "github.com/sirupsen/logrus" 27 "github.com/vishvananda/netlink" 28 29 "github.com/kata-containers/agent/protocols/grpc" 30 "github.com/kata-containers/runtime/virtcontainers/device/api" 31 "github.com/kata-containers/runtime/virtcontainers/device/config" 32 "github.com/kata-containers/runtime/virtcontainers/device/drivers" 33 deviceManager "github.com/kata-containers/runtime/virtcontainers/device/manager" 34 exp "github.com/kata-containers/runtime/virtcontainers/experimental" 35 "github.com/kata-containers/runtime/virtcontainers/persist" 36 persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" 37 "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" 38 vccgroups "github.com/kata-containers/runtime/virtcontainers/pkg/cgroups" 39 "github.com/kata-containers/runtime/virtcontainers/pkg/compatoci" 40 "github.com/kata-containers/runtime/virtcontainers/pkg/rootless" 41 vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types" 42 "github.com/kata-containers/runtime/virtcontainers/store" 43 "github.com/kata-containers/runtime/virtcontainers/types" 44 "github.com/kata-containers/runtime/virtcontainers/utils" 45 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 46 ) 47 48 const ( 49 // vmStartTimeout represents the time in seconds a sandbox can wait before 50 // to consider the VM starting operation failed. 51 vmStartTimeout = 10 52 53 // DirMode is the permission bits used for creating a directory 54 DirMode = os.FileMode(0750) | os.ModeDir 55 ) 56 57 // SandboxStatus describes a sandbox status. 58 type SandboxStatus struct { 59 ID string 60 State types.SandboxState 61 Hypervisor HypervisorType 62 HypervisorConfig HypervisorConfig 63 Agent AgentType 64 ContainersStatus []ContainerStatus 65 66 // Annotations allow clients to store arbitrary values, 67 // for example to add additional status values required 68 // to support particular specifications. 69 Annotations map[string]string 70 } 71 72 // SandboxStats describes a sandbox's stats 73 type SandboxStats struct { 74 CgroupStats CgroupStats 75 Cpus int 76 } 77 78 // SandboxConfig is a Sandbox configuration. 79 type SandboxConfig struct { 80 ID string 81 82 Hostname string 83 84 HypervisorType HypervisorType 85 HypervisorConfig HypervisorConfig 86 87 AgentType AgentType 88 AgentConfig interface{} 89 90 ProxyType ProxyType 91 ProxyConfig ProxyConfig 92 93 ShimType ShimType 94 ShimConfig interface{} 95 96 NetworkConfig NetworkConfig 97 98 // Volumes is a list of shared volumes between the host and the Sandbox. 99 Volumes []types.Volume 100 101 // Containers describe the list of containers within a Sandbox. 102 // This list can be empty and populated by adding containers 103 // to the Sandbox a posteriori. 104 //TODO: this should be a map to avoid duplicated containers 105 Containers []ContainerConfig 106 107 // Annotations keys must be unique strings and must be name-spaced 108 // with e.g. reverse domain notation (org.clearlinux.key). 109 Annotations map[string]string 110 111 ShmSize uint64 112 113 // SharePidNs sets all containers to share the same sandbox level pid namespace. 114 SharePidNs bool 115 116 // types.Stateful keeps sandbox resources in memory across APIs. Users will be responsible 117 // for calling Release() to release the memory resources. 118 Stateful bool 119 120 // SystemdCgroup enables systemd cgroup support 121 SystemdCgroup bool 122 123 // SandboxCgroupOnly enables cgroup only at podlevel in the host 124 SandboxCgroupOnly bool 125 126 // EnableAgentPidNs allows containers to share pid namespace with the agent 127 EnableAgentPidNs bool 128 129 DisableGuestSeccomp bool 130 131 // Experimental features enabled 132 Experimental []exp.Feature 133 134 // Cgroups specifies specific cgroup settings for the various subsystems that the container is 135 // placed into to limit the resources the container has available 136 Cgroups *configs.Cgroup 137 } 138 139 func (s *Sandbox) trace(name string) (opentracing.Span, context.Context) { 140 if s.ctx == nil { 141 s.Logger().WithField("type", "bug").Error("trace called before context set") 142 s.ctx = context.Background() 143 } 144 145 span, ctx := opentracing.StartSpanFromContext(s.ctx, name) 146 147 span.SetTag("subsystem", "sandbox") 148 149 return span, ctx 150 } 151 152 func (s *Sandbox) startProxy() error { 153 154 // If the proxy is KataBuiltInProxyType type, it needs to restart the proxy 155 // to watch the guest console if it hadn't been watched. 156 if s.agent == nil { 157 return fmt.Errorf("sandbox %s missed agent pointer", s.ID()) 158 } 159 160 return s.agent.startProxy(s) 161 } 162 163 // valid checks that the sandbox configuration is valid. 164 func (sandboxConfig *SandboxConfig) valid() bool { 165 if sandboxConfig.ID == "" { 166 return false 167 } 168 169 if _, err := newHypervisor(sandboxConfig.HypervisorType); err != nil { 170 sandboxConfig.HypervisorType = QemuHypervisor 171 } 172 173 // validate experimental features 174 for _, f := range sandboxConfig.Experimental { 175 if exp.Get(f.Name) == nil { 176 return false 177 } 178 } 179 return true 180 } 181 182 // Sandbox is composed of a set of containers and a runtime environment. 183 // A Sandbox can be created, deleted, started, paused, stopped, listed, entered, and restored. 184 type Sandbox struct { 185 id string 186 187 sync.Mutex 188 factory Factory 189 hypervisor hypervisor 190 agent agent 191 store *store.VCStore 192 // store is used to replace VCStore step by step 193 newStore persistapi.PersistDriver 194 195 network Network 196 monitor *monitor 197 198 config *SandboxConfig 199 200 devManager api.DeviceManager 201 202 volumes []types.Volume 203 204 containers map[string]*Container 205 206 state types.SandboxState 207 208 networkNS NetworkNamespace 209 210 annotationsLock *sync.RWMutex 211 212 wg *sync.WaitGroup 213 214 shmSize uint64 215 sharePidNs bool 216 stateful bool 217 seccompSupported bool 218 disableVMShutdown bool 219 220 cgroupMgr *vccgroups.Manager 221 222 ctx context.Context 223 } 224 225 // ID returns the sandbox identifier string. 226 func (s *Sandbox) ID() string { 227 return s.id 228 } 229 230 // Logger returns a logrus logger appropriate for logging Sandbox messages 231 func (s *Sandbox) Logger() *logrus.Entry { 232 return virtLog.WithFields(logrus.Fields{ 233 "subsystem": "sandbox", 234 "sandbox": s.id, 235 }) 236 } 237 238 // Annotations returns any annotation that a user could have stored through the sandbox. 239 func (s *Sandbox) Annotations(key string) (string, error) { 240 s.annotationsLock.RLock() 241 defer s.annotationsLock.RUnlock() 242 243 value, exist := s.config.Annotations[key] 244 if !exist { 245 return "", fmt.Errorf("Annotations key %s does not exist", key) 246 } 247 248 return value, nil 249 } 250 251 // SetAnnotations sets or adds an annotations 252 func (s *Sandbox) SetAnnotations(annotations map[string]string) error { 253 s.annotationsLock.Lock() 254 defer s.annotationsLock.Unlock() 255 256 for k, v := range annotations { 257 s.config.Annotations[k] = v 258 } 259 return nil 260 } 261 262 // GetAnnotations returns sandbox's annotations 263 func (s *Sandbox) GetAnnotations() map[string]string { 264 s.annotationsLock.RLock() 265 defer s.annotationsLock.RUnlock() 266 267 return s.config.Annotations 268 } 269 270 // GetNetNs returns the network namespace of the current sandbox. 271 func (s *Sandbox) GetNetNs() string { 272 return s.networkNS.NetNsPath 273 } 274 275 // GetAllContainers returns all containers. 276 func (s *Sandbox) GetAllContainers() []VCContainer { 277 ifa := make([]VCContainer, len(s.containers)) 278 279 i := 0 280 for _, v := range s.containers { 281 ifa[i] = v 282 i++ 283 } 284 285 return ifa 286 } 287 288 // GetContainer returns the container named by the containerID. 289 func (s *Sandbox) GetContainer(containerID string) VCContainer { 290 if c, ok := s.containers[containerID]; ok { 291 return c 292 } 293 return nil 294 } 295 296 // Release closes the agent connection and removes sandbox from internal list. 297 func (s *Sandbox) Release() error { 298 s.Logger().Info("release sandbox") 299 globalSandboxList.removeSandbox(s.id) 300 if s.monitor != nil { 301 s.monitor.stop() 302 } 303 s.hypervisor.disconnect() 304 return s.agent.disconnect() 305 } 306 307 func (s *Sandbox) releaseStatelessSandbox() error { 308 if s.stateful { 309 return nil 310 } 311 312 return s.Release() 313 } 314 315 // Status gets the status of the sandbox 316 // TODO: update container status properly, see kata-containers/runtime#253 317 func (s *Sandbox) Status() SandboxStatus { 318 var contStatusList []ContainerStatus 319 for _, c := range s.containers { 320 rootfs := c.config.RootFs.Source 321 if c.config.RootFs.Mounted { 322 rootfs = c.config.RootFs.Target 323 } 324 325 contStatusList = append(contStatusList, ContainerStatus{ 326 ID: c.id, 327 State: c.state, 328 PID: c.process.Pid, 329 StartTime: c.process.StartTime, 330 RootFs: rootfs, 331 Annotations: c.config.Annotations, 332 }) 333 } 334 335 return SandboxStatus{ 336 ID: s.id, 337 State: s.state, 338 Hypervisor: s.config.HypervisorType, 339 HypervisorConfig: s.config.HypervisorConfig, 340 Agent: s.config.AgentType, 341 ContainersStatus: contStatusList, 342 Annotations: s.config.Annotations, 343 } 344 } 345 346 // Monitor returns a error channel for watcher to watch at 347 func (s *Sandbox) Monitor() (chan error, error) { 348 if s.state.State != types.StateRunning { 349 return nil, fmt.Errorf("Sandbox is not running") 350 } 351 352 s.Lock() 353 if s.monitor == nil { 354 s.monitor = newMonitor(s) 355 } 356 s.Unlock() 357 358 return s.monitor.newWatcher() 359 } 360 361 // WaitProcess waits on a container process and return its exit code 362 func (s *Sandbox) WaitProcess(containerID, processID string) (int32, error) { 363 if s.state.State != types.StateRunning { 364 return 0, fmt.Errorf("Sandbox not running") 365 } 366 367 c, err := s.findContainer(containerID) 368 if err != nil { 369 return 0, err 370 } 371 372 return c.wait(processID) 373 } 374 375 // SignalProcess sends a signal to a process of a container when all is false. 376 // When all is true, it sends the signal to all processes of a container. 377 func (s *Sandbox) SignalProcess(containerID, processID string, signal syscall.Signal, all bool) error { 378 if s.state.State != types.StateRunning { 379 return fmt.Errorf("Sandbox not running") 380 } 381 382 c, err := s.findContainer(containerID) 383 if err != nil { 384 return err 385 } 386 387 return c.signalProcess(processID, signal, all) 388 } 389 390 // WinsizeProcess resizes the tty window of a process 391 func (s *Sandbox) WinsizeProcess(containerID, processID string, height, width uint32) error { 392 if s.state.State != types.StateRunning { 393 return fmt.Errorf("Sandbox not running") 394 } 395 396 c, err := s.findContainer(containerID) 397 if err != nil { 398 return err 399 } 400 401 return c.winsizeProcess(processID, height, width) 402 } 403 404 // IOStream returns stdin writer, stdout reader and stderr reader of a process 405 func (s *Sandbox) IOStream(containerID, processID string) (io.WriteCloser, io.Reader, io.Reader, error) { 406 if s.state.State != types.StateRunning { 407 return nil, nil, nil, fmt.Errorf("Sandbox not running") 408 } 409 410 c, err := s.findContainer(containerID) 411 if err != nil { 412 return nil, nil, nil, err 413 } 414 415 return c.ioStream(processID) 416 } 417 418 func createAssets(ctx context.Context, sandboxConfig *SandboxConfig) error { 419 span, _ := trace(ctx, "createAssets") 420 defer span.Finish() 421 422 for _, name := range types.AssetTypes() { 423 a, err := types.NewAsset(sandboxConfig.Annotations, name) 424 if err != nil { 425 return err 426 } 427 428 if err := sandboxConfig.HypervisorConfig.addCustomAsset(a); err != nil { 429 return err 430 } 431 } 432 433 _, imageErr := sandboxConfig.HypervisorConfig.assetPath(types.ImageAsset) 434 _, initrdErr := sandboxConfig.HypervisorConfig.assetPath(types.InitrdAsset) 435 436 if imageErr != nil && initrdErr != nil { 437 return fmt.Errorf("%s and %s cannot be both set", types.ImageAsset, types.InitrdAsset) 438 } 439 440 return nil 441 } 442 443 func (s *Sandbox) getAndStoreGuestDetails() error { 444 guestDetailRes, err := s.agent.getGuestDetails(&grpc.GuestDetailsRequest{ 445 MemBlockSize: true, 446 MemHotplugProbe: true, 447 }) 448 if err != nil { 449 return err 450 } 451 452 if guestDetailRes != nil { 453 s.state.GuestMemoryBlockSizeMB = uint32(guestDetailRes.MemBlockSizeBytes >> 20) 454 if guestDetailRes.AgentDetails != nil { 455 s.seccompSupported = guestDetailRes.AgentDetails.SupportsSeccomp 456 } 457 s.state.GuestMemoryHotplugProbe = guestDetailRes.SupportMemHotplugProbe 458 } 459 460 return nil 461 } 462 463 // createSandbox creates a sandbox from a sandbox description, the containers list, the hypervisor 464 // and the agent passed through the Config structure. 465 // It will create and store the sandbox structure, and then ask the hypervisor 466 // to physically create that sandbox i.e. starts a VM for that sandbox to eventually 467 // be started. 468 func createSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (*Sandbox, error) { 469 span, ctx := trace(ctx, "createSandbox") 470 defer span.Finish() 471 472 if err := createAssets(ctx, &sandboxConfig); err != nil { 473 return nil, err 474 } 475 476 s, err := newSandbox(ctx, sandboxConfig, factory) 477 if err != nil { 478 return nil, err 479 } 480 481 if len(s.config.Experimental) != 0 { 482 s.Logger().WithField("features", s.config.Experimental).Infof("Enable experimental features") 483 } 484 485 // Sandbox state has been loaded from storage. 486 // If the Stae is not empty, this is a re-creation, i.e. 487 // we don't need to talk to the guest's agent, but only 488 // want to create the sandbox and its containers in memory. 489 if s.state.State != "" { 490 return s, nil 491 } 492 493 // Below code path is called only during create, because of earlier check. 494 if err := s.agent.createSandbox(s); err != nil { 495 return nil, err 496 } 497 498 // Set sandbox state 499 if err := s.setSandboxState(types.StateReady); err != nil { 500 return nil, err 501 } 502 503 return s, nil 504 } 505 506 func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (sb *Sandbox, retErr error) { 507 span, ctx := trace(ctx, "newSandbox") 508 defer span.Finish() 509 510 if !sandboxConfig.valid() { 511 return nil, fmt.Errorf("Invalid sandbox configuration") 512 } 513 514 agent := newAgent(sandboxConfig.AgentType) 515 516 hypervisor, err := newHypervisor(sandboxConfig.HypervisorType) 517 if err != nil { 518 return nil, err 519 } 520 521 s := &Sandbox{ 522 id: sandboxConfig.ID, 523 factory: factory, 524 hypervisor: hypervisor, 525 agent: agent, 526 config: &sandboxConfig, 527 volumes: sandboxConfig.Volumes, 528 containers: map[string]*Container{}, 529 state: types.SandboxState{BlockIndexMap: make(map[int]struct{})}, 530 annotationsLock: &sync.RWMutex{}, 531 wg: &sync.WaitGroup{}, 532 shmSize: sandboxConfig.ShmSize, 533 sharePidNs: sandboxConfig.SharePidNs, 534 stateful: sandboxConfig.Stateful, 535 networkNS: NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath}, 536 ctx: ctx, 537 } 538 539 if s.newStore, err = persist.GetDriver(); err != nil || s.newStore == nil { 540 return nil, fmt.Errorf("failed to get fs persist driver: %v", err) 541 } 542 543 if err = globalSandboxList.addSandbox(s); err != nil { 544 s.newStore.Destroy(s.id) 545 return nil, err 546 } 547 548 defer func() { 549 if retErr != nil { 550 s.Logger().WithError(retErr).WithField("sandboxid", s.id).Error("Create new sandbox failed") 551 globalSandboxList.removeSandbox(s.id) 552 s.newStore.Destroy(s.id) 553 } 554 }() 555 556 spec := s.GetPatchedOCISpec() 557 if spec != nil && spec.Process.SelinuxLabel != "" { 558 sandboxConfig.HypervisorConfig.SELinuxProcessLabel = spec.Process.SelinuxLabel 559 } 560 561 if useOldStore(ctx) { 562 vcStore, err := store.NewVCSandboxStore(ctx, s.id) 563 if err != nil { 564 return nil, err 565 } 566 567 s.store = vcStore 568 569 // Fetch sandbox network to be able to access it from the sandbox structure. 570 var networkNS NetworkNamespace 571 if err = s.store.Load(store.Network, &networkNS); err == nil { 572 s.networkNS = networkNS 573 } 574 575 devices, err := s.store.LoadDevices() 576 if err != nil { 577 s.Logger().WithError(err).WithField("sandboxid", s.id).Warning("load sandbox devices failed") 578 } 579 s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver, 580 sandboxConfig.HypervisorConfig.EnableVhostUserStore, 581 sandboxConfig.HypervisorConfig.VhostUserStorePath, devices) 582 583 // Load sandbox state. The hypervisor.createSandbox call, may need to access statei. 584 state, err := s.store.LoadState() 585 if err == nil { 586 s.state = state 587 } 588 589 if err = s.hypervisor.createSandbox(ctx, s.id, s.networkNS, &sandboxConfig.HypervisorConfig, s.stateful); err != nil { 590 return nil, err 591 } 592 } else { 593 s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver, 594 sandboxConfig.HypervisorConfig.EnableVhostUserStore, 595 sandboxConfig.HypervisorConfig.VhostUserStorePath, nil) 596 597 // Ignore the error. Restore can fail for a new sandbox 598 if err := s.Restore(); err != nil { 599 s.Logger().WithError(err).Debug("restore sandbox failed") 600 } 601 602 // new store doesn't require hypervisor to be stored immediately 603 if err = s.hypervisor.createSandbox(ctx, s.id, s.networkNS, &sandboxConfig.HypervisorConfig, s.stateful); err != nil { 604 return nil, err 605 } 606 } 607 608 agentConfig, err := newAgentConfig(sandboxConfig.AgentType, sandboxConfig.AgentConfig) 609 if err != nil { 610 return nil, err 611 } 612 613 if s.disableVMShutdown, err = s.agent.init(ctx, s, agentConfig); err != nil { 614 return nil, err 615 } 616 617 return s, nil 618 } 619 620 func (s *Sandbox) createCgroupManager() error { 621 var err error 622 cgroupPath := "" 623 624 // Do not change current cgroup configuration. 625 // Create a spec without constraints 626 resources := specs.LinuxResources{} 627 628 if s.config == nil { 629 return fmt.Errorf("Could not create cgroup manager: empty sandbox configuration") 630 } 631 632 spec := s.GetPatchedOCISpec() 633 if spec != nil && spec.Linux != nil { 634 cgroupPath = spec.Linux.CgroupsPath 635 636 // Kata relies on the cgroup parent created and configured by the container 637 // engine by default. The exception is for devices whitelist as well as sandbox-level 638 // CPUSet. 639 if spec.Linux.Resources != nil { 640 resources.Devices = spec.Linux.Resources.Devices 641 642 if spec.Linux.Resources.CPU != nil { 643 resources.CPU = &specs.LinuxCPU{ 644 Cpus: spec.Linux.Resources.CPU.Cpus, 645 } 646 } 647 } 648 649 //TODO: in Docker or Podman use case, it is reasonable to set a constraint. Need to add a flag 650 // to allow users to configure Kata to constrain CPUs and Memory in this alternative 651 // scenario. See https://github.com/kata-containers/runtime/issues/2811 652 } 653 654 if s.devManager != nil { 655 for _, d := range s.devManager.GetAllDevices() { 656 dev, err := vccgroups.DeviceToLinuxDevice(d.GetHostPath()) 657 if err != nil { 658 s.Logger().WithError(err).WithField("device", d.GetHostPath()).Warn("Could not add device to sandbox resources") 659 continue 660 } 661 resources.Devices = append(resources.Devices, dev) 662 } 663 } 664 665 // Create the cgroup manager, this way it can be used later 666 // to create or detroy cgroups 667 if s.cgroupMgr, err = vccgroups.New( 668 &vccgroups.Config{ 669 Cgroups: s.config.Cgroups, 670 CgroupPaths: s.state.CgroupPaths, 671 Resources: resources, 672 CgroupPath: cgroupPath, 673 }, 674 ); err != nil { 675 return err 676 } 677 678 return nil 679 } 680 681 // storeSandbox stores a sandbox config. 682 func (s *Sandbox) storeSandbox() error { 683 span, _ := s.trace("storeSandbox") 684 defer span.Finish() 685 686 // flush data to storage 687 if err := s.Save(); err != nil { 688 return err 689 } 690 return nil 691 } 692 693 func rLockSandbox(sandboxID string) (func() error, error) { 694 store, err := persist.GetDriver() 695 if err != nil { 696 return nil, fmt.Errorf("failed to get fs persist driver: %v", err) 697 } 698 699 return store.Lock(sandboxID, false) 700 } 701 702 func rwLockSandbox(sandboxID string) (func() error, error) { 703 store, err := persist.GetDriver() 704 if err != nil { 705 return nil, fmt.Errorf("failed to get fs persist driver: %v", err) 706 } 707 708 return store.Lock(sandboxID, true) 709 } 710 711 // fetchSandbox fetches a sandbox config from a sandbox ID and returns a sandbox. 712 func fetchSandbox(ctx context.Context, sandboxID string) (sandbox *Sandbox, err error) { 713 virtLog.Info("fetch sandbox") 714 if sandboxID == "" { 715 return nil, vcTypes.ErrNeedSandboxID 716 } 717 718 sandbox, err = globalSandboxList.lookupSandbox(sandboxID) 719 if sandbox != nil && err == nil { 720 return sandbox, err 721 } 722 723 var config SandboxConfig 724 725 // Try to load sandbox config from new store at first. 726 c, err := loadSandboxConfig(sandboxID) 727 if err != nil { 728 virtLog.Warningf("failed to get sandbox config from new store: %v", err) 729 // If we failed to load sandbox config from new store, try again with old store. 730 c, ctx, err = loadSandboxConfigFromOldStore(ctx, sandboxID) 731 if err != nil { 732 virtLog.Warningf("failed to get sandbox config from old store: %v", err) 733 return nil, err 734 } 735 } 736 config = *c 737 738 if useOldStore(ctx) { 739 virtLog.Infof("Warning: old store has been deprecated.") 740 } 741 // fetchSandbox is not suppose to create new sandbox VM. 742 sandbox, err = createSandbox(ctx, config, nil) 743 if err != nil { 744 return nil, fmt.Errorf("failed to create sandbox with config %+v: %v", config, err) 745 } 746 747 if sandbox.config.SandboxCgroupOnly { 748 if err := sandbox.createCgroupManager(); err != nil { 749 return nil, err 750 } 751 } 752 753 // This sandbox already exists, we don't need to recreate the containers in the guest. 754 // We only need to fetch the containers from storage and create the container structs. 755 if err := sandbox.fetchContainers(); err != nil { 756 return nil, err 757 } 758 759 return sandbox, nil 760 } 761 762 // findContainer returns a container from the containers list held by the 763 // sandbox structure, based on a container ID. 764 func (s *Sandbox) findContainer(containerID string) (*Container, error) { 765 if s == nil { 766 return nil, vcTypes.ErrNeedSandbox 767 } 768 769 if containerID == "" { 770 return nil, vcTypes.ErrNeedContainerID 771 } 772 773 if c, ok := s.containers[containerID]; ok { 774 return c, nil 775 } 776 777 return nil, errors.Wrapf(vcTypes.ErrNoSuchContainer, "Could not find the container %q from the sandbox %q containers list", 778 containerID, s.id) 779 } 780 781 // removeContainer removes a container from the containers list held by the 782 // sandbox structure, based on a container ID. 783 func (s *Sandbox) removeContainer(containerID string) error { 784 if s == nil { 785 return vcTypes.ErrNeedSandbox 786 } 787 788 if containerID == "" { 789 return vcTypes.ErrNeedContainerID 790 } 791 792 if _, ok := s.containers[containerID]; !ok { 793 return errors.Wrapf(vcTypes.ErrNoSuchContainer, "Could not remove the container %q from the sandbox %q containers list", 794 containerID, s.id) 795 } 796 797 delete(s.containers, containerID) 798 799 return nil 800 } 801 802 // Delete deletes an already created sandbox. 803 // The VM in which the sandbox is running will be shut down. 804 func (s *Sandbox) Delete() error { 805 if s.state.State != types.StateReady && 806 s.state.State != types.StatePaused && 807 s.state.State != types.StateStopped { 808 return fmt.Errorf("Sandbox not ready, paused or stopped, impossible to delete") 809 } 810 811 for _, c := range s.containers { 812 if err := c.delete(); err != nil { 813 return err 814 } 815 } 816 817 if !rootless.IsRootless() { 818 if err := s.cgroupsDelete(); err != nil { 819 return err 820 } 821 } 822 823 globalSandboxList.removeSandbox(s.id) 824 825 if s.monitor != nil { 826 s.monitor.stop() 827 } 828 829 if err := s.hypervisor.cleanup(); err != nil { 830 s.Logger().WithError(err).Error("failed to cleanup hypervisor") 831 } 832 833 s.agent.cleanup(s) 834 if useOldStore(s.ctx) && s.store != nil { 835 if err := s.store.Delete(); err != nil { 836 s.Logger().WithError(err).Error("store delete failed") 837 } 838 } 839 return s.newStore.Destroy(s.id) 840 } 841 842 func (s *Sandbox) startNetworkMonitor() error { 843 span, _ := s.trace("startNetworkMonitor") 844 defer span.Finish() 845 846 binPath, err := os.Executable() 847 if err != nil { 848 return err 849 } 850 851 logLevel := "info" 852 if s.config.NetworkConfig.NetmonConfig.Debug { 853 logLevel = "debug" 854 } 855 856 params := netmonParams{ 857 netmonPath: s.config.NetworkConfig.NetmonConfig.Path, 858 debug: s.config.NetworkConfig.NetmonConfig.Debug, 859 logLevel: logLevel, 860 runtime: binPath, 861 sandboxID: s.id, 862 } 863 864 return s.network.Run(s.networkNS.NetNsPath, func() error { 865 pid, err := startNetmon(params) 866 if err != nil { 867 return err 868 } 869 870 s.networkNS.NetmonPID = pid 871 872 return nil 873 }) 874 } 875 876 func (s *Sandbox) createNetwork() error { 877 if s.config.NetworkConfig.DisableNewNetNs || 878 s.config.NetworkConfig.NetNSPath == "" { 879 return nil 880 } 881 882 span, _ := s.trace("createNetwork") 883 defer span.Finish() 884 885 s.networkNS = NetworkNamespace{ 886 NetNsPath: s.config.NetworkConfig.NetNSPath, 887 NetNsCreated: s.config.NetworkConfig.NetNsCreated, 888 } 889 890 // In case there is a factory, network interfaces are hotplugged 891 // after vm is started. 892 if s.factory == nil { 893 // Add the network 894 endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s, false) 895 if err != nil { 896 return err 897 } 898 899 s.networkNS.Endpoints = endpoints 900 901 if s.config.NetworkConfig.NetmonConfig.Enable { 902 if err := s.startNetworkMonitor(); err != nil { 903 return err 904 } 905 } 906 } 907 return nil 908 } 909 910 func (s *Sandbox) postCreatedNetwork() error { 911 912 return s.network.PostAdd(s.ctx, &s.networkNS, s.factory != nil) 913 } 914 915 func (s *Sandbox) removeNetwork() error { 916 span, _ := s.trace("removeNetwork") 917 defer span.Finish() 918 919 if s.config.NetworkConfig.NetmonConfig.Enable { 920 if err := stopNetmon(s.networkNS.NetmonPID); err != nil { 921 return err 922 } 923 } 924 925 return s.network.Remove(s.ctx, &s.networkNS, s.hypervisor) 926 } 927 928 func (s *Sandbox) generateNetInfo(inf *vcTypes.Interface) (NetworkInfo, error) { 929 hw, err := net.ParseMAC(inf.HwAddr) 930 if err != nil { 931 return NetworkInfo{}, err 932 } 933 934 var addrs []netlink.Addr 935 for _, addr := range inf.IPAddresses { 936 netlinkAddrStr := fmt.Sprintf("%s/%s", addr.Address, addr.Mask) 937 netlinkAddr, err := netlink.ParseAddr(netlinkAddrStr) 938 if err != nil { 939 return NetworkInfo{}, fmt.Errorf("could not parse %q: %v", netlinkAddrStr, err) 940 } 941 942 addrs = append(addrs, *netlinkAddr) 943 } 944 945 return NetworkInfo{ 946 Iface: NetlinkIface{ 947 LinkAttrs: netlink.LinkAttrs{ 948 Name: inf.Name, 949 HardwareAddr: hw, 950 MTU: int(inf.Mtu), 951 }, 952 Type: inf.LinkType, 953 }, 954 Addrs: addrs, 955 }, nil 956 } 957 958 // AddInterface adds new nic to the sandbox. 959 func (s *Sandbox) AddInterface(inf *vcTypes.Interface) (*vcTypes.Interface, error) { 960 netInfo, err := s.generateNetInfo(inf) 961 if err != nil { 962 return nil, err 963 } 964 965 endpoint, err := createEndpoint(netInfo, len(s.networkNS.Endpoints), s.config.NetworkConfig.InterworkingModel, nil) 966 if err != nil { 967 return nil, err 968 } 969 970 endpoint.SetProperties(netInfo) 971 if err := doNetNS(s.networkNS.NetNsPath, func(_ ns.NetNS) error { 972 s.Logger().WithField("endpoint-type", endpoint.Type()).Info("Hot attaching endpoint") 973 return endpoint.HotAttach(s.hypervisor) 974 }); err != nil { 975 return nil, err 976 } 977 978 // Update the sandbox storage 979 s.networkNS.Endpoints = append(s.networkNS.Endpoints, endpoint) 980 if err := s.Save(); err != nil { 981 return nil, err 982 } 983 984 // Add network for vm 985 inf.PciPath = endpoint.PciPath() 986 return s.agent.updateInterface(inf) 987 } 988 989 // RemoveInterface removes a nic of the sandbox. 990 func (s *Sandbox) RemoveInterface(inf *vcTypes.Interface) (*vcTypes.Interface, error) { 991 for i, endpoint := range s.networkNS.Endpoints { 992 if endpoint.HardwareAddr() == inf.HwAddr { 993 s.Logger().WithField("endpoint-type", endpoint.Type()).Info("Hot detaching endpoint") 994 if err := endpoint.HotDetach(s.hypervisor, s.networkNS.NetNsCreated, s.networkNS.NetNsPath); err != nil { 995 return inf, err 996 } 997 s.networkNS.Endpoints = append(s.networkNS.Endpoints[:i], s.networkNS.Endpoints[i+1:]...) 998 999 if err := s.Save(); err != nil { 1000 return inf, err 1001 } 1002 1003 break 1004 } 1005 } 1006 return nil, nil 1007 } 1008 1009 // ListInterfaces lists all nics and their configurations in the sandbox. 1010 func (s *Sandbox) ListInterfaces() ([]*vcTypes.Interface, error) { 1011 return s.agent.listInterfaces() 1012 } 1013 1014 // UpdateRoutes updates the sandbox route table (e.g. for portmapping support). 1015 func (s *Sandbox) UpdateRoutes(routes []*vcTypes.Route) ([]*vcTypes.Route, error) { 1016 return s.agent.updateRoutes(routes) 1017 } 1018 1019 // ListRoutes lists all routes and their configurations in the sandbox. 1020 func (s *Sandbox) ListRoutes() ([]*vcTypes.Route, error) { 1021 return s.agent.listRoutes() 1022 } 1023 1024 // startVM starts the VM. 1025 func (s *Sandbox) startVM() (err error) { 1026 span, ctx := s.trace("startVM") 1027 defer span.Finish() 1028 1029 s.Logger().Info("Starting VM") 1030 1031 if err := s.network.Run(s.networkNS.NetNsPath, func() error { 1032 if s.factory != nil { 1033 vm, err := s.factory.GetVM(ctx, VMConfig{ 1034 HypervisorType: s.config.HypervisorType, 1035 HypervisorConfig: s.config.HypervisorConfig, 1036 AgentType: s.config.AgentType, 1037 AgentConfig: s.config.AgentConfig, 1038 ProxyType: s.config.ProxyType, 1039 ProxyConfig: s.config.ProxyConfig, 1040 }) 1041 if err != nil { 1042 return err 1043 } 1044 1045 return vm.assignSandbox(s) 1046 } 1047 1048 return s.hypervisor.startSandbox(vmStartTimeout) 1049 }); err != nil { 1050 return err 1051 } 1052 1053 defer func() { 1054 if err != nil { 1055 s.hypervisor.stopSandbox() 1056 } 1057 }() 1058 1059 // In case of vm factory, network interfaces are hotplugged 1060 // after vm is started. 1061 if s.factory != nil { 1062 endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s, true) 1063 if err != nil { 1064 return err 1065 } 1066 1067 s.networkNS.Endpoints = endpoints 1068 1069 if s.config.NetworkConfig.NetmonConfig.Enable { 1070 if err := s.startNetworkMonitor(); err != nil { 1071 return err 1072 } 1073 } 1074 } 1075 1076 s.Logger().Info("VM started") 1077 1078 // Once the hypervisor is done starting the sandbox, 1079 // we want to guarantee that it is manageable. 1080 // For that we need to ask the agent to start the 1081 // sandbox inside the VM. 1082 if err := s.agent.startSandbox(s); err != nil { 1083 return err 1084 } 1085 1086 s.Logger().Info("Agent started in the sandbox") 1087 1088 return nil 1089 } 1090 1091 // stopVM: stop the sandbox's VM 1092 func (s *Sandbox) stopVM() error { 1093 span, _ := s.trace("stopVM") 1094 defer span.Finish() 1095 1096 s.Logger().Info("Stopping sandbox in the VM") 1097 if err := s.agent.stopSandbox(s); err != nil { 1098 s.Logger().WithError(err).WithField("sandboxid", s.id).Warning("Agent did not stop sandbox") 1099 } 1100 1101 if s.disableVMShutdown { 1102 // Do not kill the VM - allow the agent to shut it down 1103 // (only used to support static agent tracing). 1104 return nil 1105 } 1106 1107 s.Logger().Info("Stopping VM") 1108 return s.hypervisor.stopSandbox() 1109 } 1110 1111 func (s *Sandbox) addContainer(c *Container) error { 1112 if _, ok := s.containers[c.id]; ok { 1113 return fmt.Errorf("Duplicated container: %s", c.id) 1114 } 1115 s.containers[c.id] = c 1116 1117 return nil 1118 } 1119 1120 // newContainers creates new containers structure and 1121 // adds them to the sandbox. It does not create the containers 1122 // in the guest. This should only be used when fetching a 1123 // sandbox that already exists. 1124 func (s *Sandbox) fetchContainers() error { 1125 for i, contConfig := range s.config.Containers { 1126 // Add spec from bundle path 1127 spec, err := compatoci.GetContainerSpec(contConfig.Annotations) 1128 if err != nil { 1129 return err 1130 } 1131 contConfig.CustomSpec = &spec 1132 s.config.Containers[i] = contConfig 1133 1134 c, err := newContainer(s, &s.config.Containers[i]) 1135 if err != nil { 1136 return err 1137 } 1138 1139 if err := s.addContainer(c); err != nil { 1140 return err 1141 } 1142 } 1143 1144 return nil 1145 } 1146 1147 // CreateContainer creates a new container in the sandbox 1148 // This should be called only when the sandbox is already created. 1149 // It will add new container config to sandbox.config.Containers 1150 func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, error) { 1151 // Create the container object, add devices to the sandbox's device-manager: 1152 c, err := newContainer(s, &contConfig) 1153 if err != nil { 1154 return nil, err 1155 } 1156 1157 // Update sandbox config to include the new container's config 1158 s.config.Containers = append(s.config.Containers, contConfig) 1159 1160 defer func() { 1161 if err != nil { 1162 if len(s.config.Containers) > 0 { 1163 // delete container config 1164 s.config.Containers = s.config.Containers[:len(s.config.Containers)-1] 1165 // need to flush change to persist storage 1166 if newErr := s.storeSandbox(); newErr != nil { 1167 s.Logger().WithError(newErr).Error("Fail to flush s.config.Containers change into sandbox store") 1168 } 1169 } 1170 } 1171 }() 1172 1173 // create and start the container 1174 err = c.create() 1175 if err != nil { 1176 return nil, err 1177 } 1178 1179 // Add the container to the containers list in the sandbox. 1180 if err = s.addContainer(c); err != nil { 1181 return nil, err 1182 } 1183 1184 defer func() { 1185 // Rollback if error happens. 1186 if err != nil { 1187 logger := s.Logger().WithFields(logrus.Fields{"container-id": c.id, "sandox-id": s.id, "rollback": true}) 1188 1189 logger.Warning("Cleaning up partially created container") 1190 1191 if err2 := c.stop(true); err2 != nil { 1192 logger.WithError(err2).Warning("Could not delete container") 1193 } 1194 1195 logger.Debug("Removing stopped container from sandbox store") 1196 1197 s.removeContainer(c.id) 1198 } 1199 }() 1200 1201 // Sandbox is responsible to update VM resources needed by Containers 1202 // Update resources after having added containers to the sandbox, since 1203 // container status is requiered to know if more resources should be added. 1204 err = s.updateResources() 1205 if err != nil { 1206 return nil, err 1207 } 1208 1209 if err = s.cgroupsUpdate(); err != nil { 1210 return nil, err 1211 } 1212 1213 if err = s.storeSandbox(); err != nil { 1214 return nil, err 1215 } 1216 1217 return c, nil 1218 } 1219 1220 // StartContainer starts a container in the sandbox 1221 func (s *Sandbox) StartContainer(containerID string) (VCContainer, error) { 1222 // Fetch the container. 1223 c, err := s.findContainer(containerID) 1224 if err != nil { 1225 return nil, err 1226 } 1227 1228 // Start it. 1229 err = c.start() 1230 if err != nil { 1231 return nil, err 1232 } 1233 1234 if err = s.storeSandbox(); err != nil { 1235 return nil, err 1236 } 1237 1238 s.Logger().Info("Container is started") 1239 1240 // Update sandbox resources in case a stopped container 1241 // is started 1242 err = s.updateResources() 1243 if err != nil { 1244 return nil, err 1245 } 1246 1247 return c, nil 1248 } 1249 1250 // StopContainer stops a container in the sandbox 1251 func (s *Sandbox) StopContainer(containerID string, force bool) (VCContainer, error) { 1252 // Fetch the container. 1253 c, err := s.findContainer(containerID) 1254 if err != nil { 1255 return nil, err 1256 } 1257 1258 // Stop it. 1259 if err := c.stop(force); err != nil { 1260 return nil, err 1261 } 1262 1263 if err = s.storeSandbox(); err != nil { 1264 return nil, err 1265 } 1266 return c, nil 1267 } 1268 1269 // KillContainer signals a container in the sandbox 1270 func (s *Sandbox) KillContainer(containerID string, signal syscall.Signal, all bool) error { 1271 // Fetch the container. 1272 c, err := s.findContainer(containerID) 1273 if err != nil { 1274 return err 1275 } 1276 1277 // Send a signal to the process. 1278 err = c.kill(signal, all) 1279 1280 // SIGKILL should never fail otherwise it is 1281 // impossible to clean things up. 1282 if signal == syscall.SIGKILL { 1283 return nil 1284 } 1285 1286 return err 1287 } 1288 1289 // DeleteContainer deletes a container from the sandbox 1290 func (s *Sandbox) DeleteContainer(containerID string) (VCContainer, error) { 1291 if containerID == "" { 1292 return nil, vcTypes.ErrNeedContainerID 1293 } 1294 1295 // Fetch the container. 1296 c, err := s.findContainer(containerID) 1297 if err != nil { 1298 return nil, err 1299 } 1300 1301 // Delete it. 1302 err = c.delete() 1303 if err != nil { 1304 return nil, err 1305 } 1306 1307 // Update sandbox config 1308 for idx, contConfig := range s.config.Containers { 1309 if contConfig.ID == containerID { 1310 s.config.Containers = append(s.config.Containers[:idx], s.config.Containers[idx+1:]...) 1311 break 1312 } 1313 } 1314 1315 // update the sandbox cgroup 1316 if err = s.cgroupsUpdate(); err != nil { 1317 return nil, err 1318 } 1319 1320 if err = s.storeSandbox(); err != nil { 1321 return nil, err 1322 } 1323 return c, nil 1324 } 1325 1326 // ProcessListContainer lists every process running inside a specific 1327 // container in the sandbox. 1328 func (s *Sandbox) ProcessListContainer(containerID string, options ProcessListOptions) (ProcessList, error) { 1329 // Fetch the container. 1330 c, err := s.findContainer(containerID) 1331 if err != nil { 1332 return nil, err 1333 } 1334 1335 // Get the process list related to the container. 1336 return c.processList(options) 1337 } 1338 1339 // StatusContainer gets the status of a container 1340 // TODO: update container status properly, see kata-containers/runtime#253 1341 func (s *Sandbox) StatusContainer(containerID string) (ContainerStatus, error) { 1342 if containerID == "" { 1343 return ContainerStatus{}, vcTypes.ErrNeedContainerID 1344 } 1345 1346 if c, ok := s.containers[containerID]; ok { 1347 rootfs := c.config.RootFs.Source 1348 if c.config.RootFs.Mounted { 1349 rootfs = c.config.RootFs.Target 1350 } 1351 1352 return ContainerStatus{ 1353 ID: c.id, 1354 State: c.state, 1355 PID: c.process.Pid, 1356 StartTime: c.process.StartTime, 1357 RootFs: rootfs, 1358 Annotations: c.config.Annotations, 1359 }, nil 1360 } 1361 1362 return ContainerStatus{}, vcTypes.ErrNoSuchContainer 1363 } 1364 1365 // EnterContainer is the virtcontainers container command execution entry point. 1366 // EnterContainer enters an already running container and runs a given command. 1367 func (s *Sandbox) EnterContainer(containerID string, cmd types.Cmd) (VCContainer, *Process, error) { 1368 // Fetch the container. 1369 c, err := s.findContainer(containerID) 1370 if err != nil { 1371 return nil, nil, err 1372 } 1373 1374 // Enter it. 1375 process, err := c.enter(cmd) 1376 if err != nil { 1377 return nil, nil, err 1378 } 1379 1380 return c, process, nil 1381 } 1382 1383 // UpdateContainer update a running container. 1384 func (s *Sandbox) UpdateContainer(containerID string, resources specs.LinuxResources) error { 1385 // Fetch the container. 1386 c, err := s.findContainer(containerID) 1387 if err != nil { 1388 return err 1389 } 1390 1391 err = c.update(resources) 1392 if err != nil { 1393 return err 1394 } 1395 1396 if err := s.cgroupsUpdate(); err != nil { 1397 return err 1398 } 1399 1400 if err = s.storeSandbox(); err != nil { 1401 return err 1402 } 1403 return nil 1404 } 1405 1406 // StatsContainer return the stats of a running container 1407 func (s *Sandbox) StatsContainer(containerID string) (ContainerStats, error) { 1408 // Fetch the container. 1409 c, err := s.findContainer(containerID) 1410 if err != nil { 1411 return ContainerStats{}, err 1412 } 1413 1414 stats, err := c.stats() 1415 if err != nil { 1416 return ContainerStats{}, err 1417 } 1418 return *stats, nil 1419 } 1420 1421 // Stats returns the stats of a running sandbox 1422 func (s *Sandbox) Stats() (SandboxStats, error) { 1423 if s.state.CgroupPath == "" { 1424 return SandboxStats{}, fmt.Errorf("sandbox cgroup path is empty") 1425 } 1426 1427 var path string 1428 var cgroupSubsystems cgroups.Hierarchy 1429 1430 if s.config.SandboxCgroupOnly { 1431 cgroupSubsystems = cgroups.V1 1432 path = s.state.CgroupPath 1433 } else { 1434 cgroupSubsystems = V1NoConstraints 1435 path = cgroupNoConstraintsPath(s.state.CgroupPath) 1436 } 1437 1438 cgroup, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path)) 1439 if err != nil { 1440 return SandboxStats{}, fmt.Errorf("Could not load sandbox cgroup in %v: %v", s.state.CgroupPath, err) 1441 } 1442 1443 metrics, err := cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist)) 1444 if err != nil { 1445 return SandboxStats{}, err 1446 } 1447 1448 stats := SandboxStats{} 1449 1450 stats.CgroupStats.CPUStats.CPUUsage.TotalUsage = metrics.CPU.Usage.Total 1451 stats.CgroupStats.MemoryStats.Usage.Usage = metrics.Memory.Usage.Usage 1452 tids, err := s.hypervisor.getThreadIDs() 1453 if err != nil { 1454 return stats, err 1455 } 1456 stats.Cpus = len(tids.vcpus) 1457 1458 return stats, nil 1459 } 1460 1461 // PauseContainer pauses a running container. 1462 func (s *Sandbox) PauseContainer(containerID string) error { 1463 // Fetch the container. 1464 c, err := s.findContainer(containerID) 1465 if err != nil { 1466 return err 1467 } 1468 1469 // Pause the container. 1470 if err := c.pause(); err != nil { 1471 return err 1472 } 1473 1474 if err = s.storeSandbox(); err != nil { 1475 return err 1476 } 1477 return nil 1478 } 1479 1480 // ResumeContainer resumes a paused container. 1481 func (s *Sandbox) ResumeContainer(containerID string) error { 1482 // Fetch the container. 1483 c, err := s.findContainer(containerID) 1484 if err != nil { 1485 return err 1486 } 1487 1488 // Resume the container. 1489 if err := c.resume(); err != nil { 1490 return err 1491 } 1492 1493 if err = s.storeSandbox(); err != nil { 1494 return err 1495 } 1496 return nil 1497 } 1498 1499 // createContainers registers all containers to the proxy, create the 1500 // containers in the guest and starts one shim per container. 1501 func (s *Sandbox) createContainers() error { 1502 span, _ := s.trace("createContainers") 1503 defer span.Finish() 1504 1505 for _, contConfig := range s.config.Containers { 1506 1507 c, err := newContainer(s, &contConfig) 1508 if err != nil { 1509 return err 1510 } 1511 if err := c.create(); err != nil { 1512 return err 1513 } 1514 1515 if err := s.addContainer(c); err != nil { 1516 return err 1517 } 1518 } 1519 1520 // Update resources after having added containers to the sandbox, since 1521 // container status is requiered to know if more resources should be added. 1522 if err := s.updateResources(); err != nil { 1523 return err 1524 } 1525 1526 if err := s.cgroupsUpdate(); err != nil { 1527 return err 1528 } 1529 if err := s.storeSandbox(); err != nil { 1530 return err 1531 } 1532 1533 return nil 1534 } 1535 1536 // Start starts a sandbox. The containers that are making the sandbox 1537 // will be started. 1538 func (s *Sandbox) Start() error { 1539 if err := s.state.ValidTransition(s.state.State, types.StateRunning); err != nil { 1540 return err 1541 } 1542 1543 prevState := s.state.State 1544 1545 if err := s.setSandboxState(types.StateRunning); err != nil { 1546 return err 1547 } 1548 1549 var startErr error 1550 defer func() { 1551 if startErr != nil { 1552 s.setSandboxState(prevState) 1553 } 1554 }() 1555 for _, c := range s.containers { 1556 if startErr = c.start(); startErr != nil { 1557 return startErr 1558 } 1559 } 1560 1561 if err := s.storeSandbox(); err != nil { 1562 return err 1563 } 1564 1565 s.Logger().Info("Sandbox is started") 1566 1567 return nil 1568 } 1569 1570 // Stop stops a sandbox. The containers that are making the sandbox 1571 // will be destroyed. 1572 // When force is true, ignore guest related stop failures. 1573 func (s *Sandbox) Stop(force bool) error { 1574 span, _ := s.trace("stop") 1575 defer span.Finish() 1576 1577 if s.state.State == types.StateStopped { 1578 s.Logger().Info("sandbox already stopped") 1579 return nil 1580 } 1581 1582 if err := s.state.ValidTransition(s.state.State, types.StateStopped); err != nil { 1583 return err 1584 } 1585 1586 for _, c := range s.containers { 1587 if err := c.stop(force); err != nil { 1588 return err 1589 } 1590 } 1591 1592 if err := s.stopVM(); err != nil && !force { 1593 return err 1594 } 1595 1596 if err := s.setSandboxState(types.StateStopped); err != nil { 1597 return err 1598 } 1599 1600 // Remove the network. 1601 if err := s.removeNetwork(); err != nil && !force { 1602 return err 1603 } 1604 1605 if err := s.storeSandbox(); err != nil { 1606 return err 1607 } 1608 1609 // Stop communicating with the agent. 1610 if err := s.agent.disconnect(); err != nil && !force { 1611 return err 1612 } 1613 1614 return nil 1615 } 1616 1617 // list lists all sandbox running on the host. 1618 func (s *Sandbox) list() ([]Sandbox, error) { 1619 return nil, nil 1620 } 1621 1622 // enter runs an executable within a sandbox. 1623 func (s *Sandbox) enter(args []string) error { 1624 return nil 1625 } 1626 1627 // setSandboxState sets both the in-memory and on-disk state of the 1628 // sandbox. 1629 func (s *Sandbox) setSandboxState(state types.StateString) error { 1630 if state == "" { 1631 return vcTypes.ErrNeedState 1632 } 1633 1634 // update in-memory state 1635 s.state.State = state 1636 1637 if useOldStore(s.ctx) { 1638 return s.store.Store(store.State, s.state) 1639 } 1640 return nil 1641 } 1642 1643 const maxBlockIndex = 65535 1644 1645 // getAndSetSandboxBlockIndex retrieves an unused sandbox block index from 1646 // the BlockIndexMap and marks it as used. This index is used to maintain the 1647 // index at which a block device is assigned to a container in the sandbox. 1648 func (s *Sandbox) getAndSetSandboxBlockIndex() (int, error) { 1649 currentIndex := -1 1650 for i := 0; i < maxBlockIndex; i++ { 1651 if _, ok := s.state.BlockIndexMap[i]; !ok { 1652 currentIndex = i 1653 break 1654 } 1655 } 1656 if currentIndex == -1 { 1657 return -1, errors.New("no available block index") 1658 } 1659 s.state.BlockIndexMap[currentIndex] = struct{}{} 1660 1661 return currentIndex, nil 1662 } 1663 1664 // unsetSandboxBlockIndex deletes the current sandbox block index from BlockIndexMap. 1665 // This is used to recover from failure while adding a block device. 1666 func (s *Sandbox) unsetSandboxBlockIndex(index int) error { 1667 var err error 1668 original := index 1669 delete(s.state.BlockIndexMap, index) 1670 defer func() { 1671 if err != nil { 1672 s.state.BlockIndexMap[original] = struct{}{} 1673 } 1674 }() 1675 1676 return nil 1677 } 1678 1679 // HotplugAddDevice is used for add a device to sandbox 1680 // Sandbox implement DeviceReceiver interface from device/api/interface.go 1681 func (s *Sandbox) HotplugAddDevice(device api.Device, devType config.DeviceType) error { 1682 span, _ := s.trace("HotplugAddDevice") 1683 defer span.Finish() 1684 1685 if s.config.SandboxCgroupOnly { 1686 // We are about to add a device to the hypervisor, 1687 // the device cgroup MUST be updated since the hypervisor 1688 // will need access to such device 1689 hdev := device.GetHostPath() 1690 if err := s.cgroupMgr.AddDevice(hdev); err != nil { 1691 s.Logger().WithError(err).WithField("device", hdev). 1692 Warn("Could not add device to cgroup") 1693 } 1694 } 1695 1696 switch devType { 1697 case config.DeviceVFIO: 1698 vfioDevices, ok := device.GetDeviceInfo().([]*config.VFIODev) 1699 if !ok { 1700 return fmt.Errorf("device type mismatch, expect device type to be %s", devType) 1701 } 1702 1703 // adding a group of VFIO devices 1704 for _, dev := range vfioDevices { 1705 if _, err := s.hypervisor.hotplugAddDevice(dev, vfioDev); err != nil { 1706 s.Logger(). 1707 WithFields(logrus.Fields{ 1708 "sandbox": s.id, 1709 "vfio-device-ID": dev.ID, 1710 "vfio-device-BDF": dev.BDF, 1711 }).WithError(err).Error("failed to hotplug VFIO device") 1712 return err 1713 } 1714 } 1715 return nil 1716 case config.DeviceBlock: 1717 blockDevice, ok := device.(*drivers.BlockDevice) 1718 if !ok { 1719 return fmt.Errorf("device type mismatch, expect device type to be %s", devType) 1720 } 1721 _, err := s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev) 1722 return err 1723 case config.VhostUserBlk: 1724 vhostUserBlkDevice, ok := device.(*drivers.VhostUserBlkDevice) 1725 if !ok { 1726 return fmt.Errorf("device type mismatch, expect device type to be %s", devType) 1727 } 1728 _, err := s.hypervisor.hotplugAddDevice(vhostUserBlkDevice.VhostUserDeviceAttrs, vhostuserDev) 1729 return err 1730 case config.DeviceGeneric: 1731 // TODO: what? 1732 return nil 1733 } 1734 return nil 1735 } 1736 1737 // HotplugRemoveDevice is used for removing a device from sandbox 1738 // Sandbox implement DeviceReceiver interface from device/api/interface.go 1739 func (s *Sandbox) HotplugRemoveDevice(device api.Device, devType config.DeviceType) error { 1740 defer func() { 1741 if s.config.SandboxCgroupOnly { 1742 // Remove device from cgroup, the hypervisor 1743 // should not have access to such device anymore. 1744 hdev := device.GetHostPath() 1745 if err := s.cgroupMgr.RemoveDevice(hdev); err != nil { 1746 s.Logger().WithError(err).WithField("device", hdev). 1747 Warn("Could not remove device from cgroup") 1748 } 1749 } 1750 }() 1751 1752 switch devType { 1753 case config.DeviceVFIO: 1754 vfioDevices, ok := device.GetDeviceInfo().([]*config.VFIODev) 1755 if !ok { 1756 return fmt.Errorf("device type mismatch, expect device type to be %s", devType) 1757 } 1758 1759 // remove a group of VFIO devices 1760 for _, dev := range vfioDevices { 1761 if _, err := s.hypervisor.hotplugRemoveDevice(dev, vfioDev); err != nil { 1762 s.Logger().WithError(err). 1763 WithFields(logrus.Fields{ 1764 "sandbox": s.id, 1765 "vfio-device-ID": dev.ID, 1766 "vfio-device-BDF": dev.BDF, 1767 }).Error("failed to hot unplug VFIO device") 1768 return err 1769 } 1770 } 1771 return nil 1772 case config.DeviceBlock: 1773 blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) 1774 if !ok { 1775 return fmt.Errorf("device type mismatch, expect device type to be %s", devType) 1776 } 1777 _, err := s.hypervisor.hotplugRemoveDevice(blockDrive, blockDev) 1778 return err 1779 case config.VhostUserBlk: 1780 vhostUserDeviceAttrs, ok := device.GetDeviceInfo().(*config.VhostUserDeviceAttrs) 1781 if !ok { 1782 return fmt.Errorf("device type mismatch, expect device type to be %s", devType) 1783 } 1784 _, err := s.hypervisor.hotplugRemoveDevice(vhostUserDeviceAttrs, vhostuserDev) 1785 return err 1786 case config.DeviceGeneric: 1787 // TODO: what? 1788 return nil 1789 } 1790 return nil 1791 } 1792 1793 // GetAndSetSandboxBlockIndex is used for getting and setting virtio-block indexes 1794 // Sandbox implement DeviceReceiver interface from device/api/interface.go 1795 func (s *Sandbox) GetAndSetSandboxBlockIndex() (int, error) { 1796 return s.getAndSetSandboxBlockIndex() 1797 } 1798 1799 // UnsetSandboxBlockIndex unsets block indexes 1800 // Sandbox implement DeviceReceiver interface from device/api/interface.go 1801 func (s *Sandbox) UnsetSandboxBlockIndex(index int) error { 1802 return s.unsetSandboxBlockIndex(index) 1803 } 1804 1805 // AppendDevice can only handle vhost user device currently, it adds a 1806 // vhost user device to sandbox 1807 // Sandbox implement DeviceReceiver interface from device/api/interface.go 1808 func (s *Sandbox) AppendDevice(device api.Device) error { 1809 switch device.DeviceType() { 1810 case config.VhostUserSCSI, config.VhostUserNet, config.VhostUserBlk, config.VhostUserFS: 1811 return s.hypervisor.addDevice(device.GetDeviceInfo().(*config.VhostUserDeviceAttrs), vhostuserDev) 1812 case config.DeviceVFIO: 1813 vfioDevs := device.GetDeviceInfo().([]*config.VFIODev) 1814 for _, d := range vfioDevs { 1815 return s.hypervisor.addDevice(*d, vfioDev) 1816 } 1817 default: 1818 s.Logger().WithField("device-type", device.DeviceType()). 1819 Warn("Could not append device: unsupported device type") 1820 } 1821 1822 return fmt.Errorf("unsupported device type") 1823 } 1824 1825 // AddDevice will add a device to sandbox 1826 func (s *Sandbox) AddDevice(info config.DeviceInfo) (api.Device, error) { 1827 if s.devManager == nil { 1828 return nil, fmt.Errorf("device manager isn't initialized") 1829 } 1830 1831 var err error 1832 b, err := s.devManager.NewDevice(info) 1833 if err != nil { 1834 return nil, err 1835 } 1836 defer func() { 1837 if err != nil { 1838 s.devManager.RemoveDevice(b.DeviceID()) 1839 } 1840 }() 1841 1842 if err = s.devManager.AttachDevice(b.DeviceID(), s); err != nil { 1843 return nil, err 1844 } 1845 defer func() { 1846 if err != nil { 1847 s.devManager.DetachDevice(b.DeviceID(), s) 1848 } 1849 }() 1850 1851 return b, nil 1852 } 1853 1854 // updateResources will: 1855 // - calculate the resources required for the virtual machine, and adjust the virtual machine 1856 // sizing accordingly. For a given sandbox, it will calculate the number of vCPUs required based 1857 // on the sum of container requests, plus default CPUs for the VM. Similar is done for memory. 1858 // If changes in memory or CPU are made, the VM will be updated and the agent will online the 1859 // applicable CPU and memory. 1860 func (s *Sandbox) updateResources() error { 1861 if s == nil { 1862 return errors.New("sandbox is nil") 1863 } 1864 1865 if s.config == nil { 1866 return fmt.Errorf("sandbox config is nil") 1867 } 1868 1869 sandboxVCPUs, err := s.calculateSandboxCPUs() 1870 if err != nil { 1871 return err 1872 } 1873 // Add default vcpus for sandbox 1874 sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs 1875 1876 sandboxMemoryByte := s.calculateSandboxMemory() 1877 1878 // Add default / rsvd memory for sandbox. 1879 sandboxMemoryByte += uint64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift 1880 1881 // Update VCPUs 1882 s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs") 1883 oldCPUs, newCPUs, err := s.hypervisor.resizeVCPUs(sandboxVCPUs) 1884 if err != nil { 1885 return err 1886 } 1887 1888 // If the CPUs were increased, ask agent to online them 1889 if oldCPUs < newCPUs { 1890 vcpusAdded := newCPUs - oldCPUs 1891 if err := s.agent.onlineCPUMem(vcpusAdded, true); err != nil { 1892 return err 1893 } 1894 } 1895 s.Logger().Debugf("Sandbox CPUs: %d", newCPUs) 1896 1897 // Update Memory 1898 s.Logger().WithField("memory-sandbox-size-byte", sandboxMemoryByte).Debugf("Request to hypervisor to update memory") 1899 newMemoryMB := uint32(sandboxMemoryByte >> utils.MibToBytesShift) 1900 1901 newMemory, updatedMemoryDevice, err := s.hypervisor.resizeMemory(newMemoryMB, s.state.GuestMemoryBlockSizeMB, s.state.GuestMemoryHotplugProbe) 1902 if err != nil { 1903 return err 1904 } 1905 s.Logger().Debugf("Sandbox memory size: %d MB", newMemory) 1906 if s.state.GuestMemoryHotplugProbe && updatedMemoryDevice.addr != 0 { 1907 // notify the guest kernel about memory hot-add event, before onlining them 1908 s.Logger().Debugf("notify guest kernel memory hot-add event via probe interface, memory device located at 0x%x", updatedMemoryDevice.addr) 1909 if err := s.agent.memHotplugByProbe(updatedMemoryDevice.addr, uint32(updatedMemoryDevice.sizeMB), s.state.GuestMemoryBlockSizeMB); err != nil { 1910 return err 1911 } 1912 } 1913 if err := s.agent.onlineCPUMem(0, false); err != nil { 1914 return err 1915 } 1916 return nil 1917 } 1918 1919 func (s *Sandbox) calculateSandboxMemory() uint64 { 1920 memorySandbox := uint64(0) 1921 for _, c := range s.config.Containers { 1922 // Do not hot add again non-running containers resources 1923 if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped { 1924 s.Logger().WithField("container-id", c.ID).Debug("Do not taking into account memory resources of not running containers") 1925 continue 1926 } 1927 1928 if m := c.Resources.Memory; m != nil && m.Limit != nil && *m.Limit > 0 { 1929 memorySandbox += uint64(*m.Limit) 1930 s.Logger().WithField("memory limit", memorySandbox).Info("Memory Sandbox + Memory Limit ") 1931 } 1932 1933 //Add hugepages memory 1934 //HugepageLimit is uint64 - https://github.com/opencontainers/runtime-spec/blob/master/specs-go/config.go#L242 1935 for _, l := range c.Resources.HugepageLimits { 1936 memorySandbox += uint64(l.Limit) 1937 } 1938 } 1939 1940 return memorySandbox 1941 } 1942 1943 func (s *Sandbox) calculateSandboxCPUs() (uint32, error) { 1944 mCPU := uint32(0) 1945 cpusetCount := int(0) 1946 1947 for _, c := range s.config.Containers { 1948 // Do not hot add again non-running containers resources 1949 if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped { 1950 s.Logger().WithField("container-id", c.ID).Debug("Do not taking into account CPU resources of not running containers") 1951 continue 1952 } 1953 1954 if cpu := c.Resources.CPU; cpu != nil { 1955 if cpu.Period != nil && cpu.Quota != nil { 1956 mCPU += utils.CalculateMilliCPUs(*cpu.Quota, *cpu.Period) 1957 } 1958 1959 set, err := cpuset.Parse(cpu.Cpus) 1960 if err != nil { 1961 return 0, nil 1962 } 1963 cpusetCount += set.Size() 1964 } 1965 } 1966 1967 // If we aren't being constrained, then we could have two scenarios: 1968 // 1. BestEffort QoS: no proper support today in Kata. 1969 // 2. We could be constrained only by CPUSets. Check for this: 1970 if mCPU == 0 && cpusetCount > 0 { 1971 return uint32(cpusetCount), nil 1972 } 1973 1974 return utils.CalculateVCpusFromMilliCpus(mCPU), nil 1975 } 1976 1977 // GetHypervisorType is used for getting Hypervisor name currently used. 1978 // Sandbox implement DeviceReceiver interface from device/api/interface.go 1979 func (s *Sandbox) GetHypervisorType() string { 1980 return string(s.config.HypervisorType) 1981 } 1982 1983 // cgroupsUpdate will: 1984 // 1) get the v1constraints cgroup associated with the stored cgroup path 1985 // 2) (re-)add hypervisor vCPU threads to the appropriate cgroup 1986 // 3) If we are managing sandbox cgroup, update the v1constraints cgroup size 1987 func (s *Sandbox) cgroupsUpdate() error { 1988 1989 // If Kata is configured for SandboxCgroupOnly, the VMM and its processes are already 1990 // in the Kata sandbox cgroup (inherited). Check to see if sandbox cpuset needs to be 1991 // updated. 1992 if s.config.SandboxCgroupOnly { 1993 cpuset, memset, err := s.getSandboxCPUSet() 1994 if err != nil { 1995 return err 1996 } 1997 1998 if err := s.cgroupMgr.SetCPUSet(cpuset, memset); err != nil { 1999 return err 2000 } 2001 2002 return nil 2003 } 2004 2005 if s.state.CgroupPath == "" { 2006 s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty") 2007 return nil 2008 } 2009 2010 cgroup, err := cgroupsLoadFunc(V1Constraints, cgroups.StaticPath(s.state.CgroupPath)) 2011 if err != nil { 2012 return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err) 2013 } 2014 2015 if err := s.constrainHypervisor(cgroup); err != nil { 2016 return err 2017 } 2018 2019 if len(s.containers) <= 1 { 2020 // nothing to update 2021 return nil 2022 } 2023 2024 resources, err := s.resources() 2025 if err != nil { 2026 return err 2027 } 2028 2029 if err := cgroup.Update(&resources); err != nil { 2030 return fmt.Errorf("Could not update sandbox cgroup path='%v' error='%v'", s.state.CgroupPath, err) 2031 } 2032 2033 return nil 2034 } 2035 2036 // cgroupsDelete will move the running processes in the sandbox cgroup 2037 // to the parent and then delete the sandbox cgroup 2038 func (s *Sandbox) cgroupsDelete() error { 2039 s.Logger().Debug("Deleting sandbox cgroup") 2040 if s.state.CgroupPath == "" { 2041 s.Logger().Warnf("sandbox cgroups path is empty") 2042 return nil 2043 } 2044 2045 var path string 2046 var cgroupSubsystems cgroups.Hierarchy 2047 2048 if s.config.SandboxCgroupOnly { 2049 return s.cgroupMgr.Destroy() 2050 } 2051 2052 cgroupSubsystems = V1NoConstraints 2053 path = cgroupNoConstraintsPath(s.state.CgroupPath) 2054 s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup") 2055 2056 sandboxCgroups, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path)) 2057 if err == cgroups.ErrCgroupDeleted { 2058 // cgroup already deleted 2059 s.Logger().Warnf("cgroup already deleted: '%s'", err) 2060 return nil 2061 } 2062 2063 if err != nil { 2064 return fmt.Errorf("Could not load cgroups %v: %v", path, err) 2065 } 2066 2067 // move running process here, that way cgroup can be removed 2068 parent, err := parentCgroup(cgroupSubsystems, path) 2069 if err != nil { 2070 // parent cgroup doesn't exist, that means there are no process running 2071 // and the no constraints cgroup was removed. 2072 s.Logger().WithError(err).Warn("Parent cgroup doesn't exist") 2073 return nil 2074 } 2075 2076 if err := sandboxCgroups.MoveTo(parent); err != nil { 2077 // Don't fail, cgroup can be deleted 2078 s.Logger().WithError(err).Warnf("Could not move process from %s to parent cgroup", path) 2079 } 2080 2081 return sandboxCgroups.Delete() 2082 } 2083 2084 // constrainHypervisor will place the VMM and vCPU threads into cgroups. 2085 func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error { 2086 // VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set. 2087 // This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take 2088 // Kata/VMM into account, Kata may fail to boot due to being overconstrained. 2089 // If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained 2090 // cgroup 2091 if s.config.SandboxCgroupOnly { 2092 // Kata components were moved into the sandbox-cgroup already, so VMM 2093 // will already land there as well. No need to take action 2094 return nil 2095 } 2096 2097 pids := s.hypervisor.getPids() 2098 if len(pids) == 0 || pids[0] == 0 { 2099 return fmt.Errorf("Invalid hypervisor PID: %+v", pids) 2100 } 2101 2102 // VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set. 2103 // This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take 2104 // Kata/VMM into account, Kata may fail to boot due to being overconstrained. 2105 // If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained 2106 // cgroup 2107 // Move the VMM into cgroups without constraints, those cgroups are not yet supported. 2108 resources := &specs.LinuxResources{} 2109 path := cgroupNoConstraintsPath(s.state.CgroupPath) 2110 vmmCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources) 2111 if err != nil { 2112 return fmt.Errorf("Could not create cgroup %v: %v", path, err) 2113 } 2114 2115 for _, pid := range pids { 2116 if pid <= 0 { 2117 s.Logger().Warnf("Invalid hypervisor pid: %d", pid) 2118 continue 2119 } 2120 2121 if err := vmmCgroup.Add(cgroups.Process{Pid: pid}); err != nil { 2122 return fmt.Errorf("Could not add hypervisor PID %d to cgroup: %v", pid, err) 2123 } 2124 } 2125 2126 // when new container joins, new CPU could be hotplugged, so we 2127 // have to query fresh vcpu info from hypervisor every time. 2128 tids, err := s.hypervisor.getThreadIDs() 2129 if err != nil { 2130 return fmt.Errorf("failed to get thread ids from hypervisor: %v", err) 2131 } 2132 if len(tids.vcpus) == 0 { 2133 // If there's no tid returned from the hypervisor, this is not 2134 // a bug. It simply means there is nothing to constrain, hence 2135 // let's return without any error from here. 2136 return nil 2137 } 2138 2139 // Move vcpus (threads) into cgroups with constraints. 2140 // Move whole hypervisor process would be easier but the IO/network performance 2141 // would be over-constrained. 2142 for _, i := range tids.vcpus { 2143 // In contrast, AddTask will write thread id to `tasks` 2144 // After this, vcpu threads are in "vcpu" sub-cgroup, other threads in 2145 // qemu will be left in parent cgroup untouched. 2146 if err := cgroup.AddTask(cgroups.Process{ 2147 Pid: i, 2148 }); err != nil { 2149 return err 2150 } 2151 } 2152 2153 return nil 2154 } 2155 2156 func (s *Sandbox) resources() (specs.LinuxResources, error) { 2157 resources := specs.LinuxResources{ 2158 CPU: s.cpuResources(), 2159 } 2160 2161 return resources, nil 2162 } 2163 2164 func (s *Sandbox) cpuResources() *specs.LinuxCPU { 2165 // Use default period and quota if they are not specified. 2166 // Container will inherit the constraints from its parent. 2167 quota := int64(0) 2168 period := uint64(0) 2169 shares := uint64(0) 2170 realtimePeriod := uint64(0) 2171 realtimeRuntime := int64(0) 2172 2173 cpu := &specs.LinuxCPU{ 2174 Quota: "a, 2175 Period: &period, 2176 Shares: &shares, 2177 RealtimePeriod: &realtimePeriod, 2178 RealtimeRuntime: &realtimeRuntime, 2179 } 2180 2181 for _, c := range s.containers { 2182 ann := c.GetAnnotations() 2183 if ann[annotations.ContainerTypeKey] == string(PodSandbox) { 2184 // skip sandbox container 2185 continue 2186 } 2187 2188 if c.config.Resources.CPU == nil { 2189 continue 2190 } 2191 2192 if c.config.Resources.CPU.Shares != nil { 2193 shares = uint64(math.Max(float64(*c.config.Resources.CPU.Shares), float64(shares))) 2194 } 2195 2196 if c.config.Resources.CPU.Quota != nil { 2197 quota += *c.config.Resources.CPU.Quota 2198 } 2199 2200 if c.config.Resources.CPU.Period != nil { 2201 period = uint64(math.Max(float64(*c.config.Resources.CPU.Period), float64(period))) 2202 } 2203 2204 if c.config.Resources.CPU.Cpus != "" { 2205 cpu.Cpus += c.config.Resources.CPU.Cpus + "," 2206 } 2207 2208 if c.config.Resources.CPU.RealtimeRuntime != nil { 2209 realtimeRuntime += *c.config.Resources.CPU.RealtimeRuntime 2210 } 2211 2212 if c.config.Resources.CPU.RealtimePeriod != nil { 2213 realtimePeriod += *c.config.Resources.CPU.RealtimePeriod 2214 } 2215 2216 if c.config.Resources.CPU.Mems != "" { 2217 cpu.Mems += c.config.Resources.CPU.Mems + "," 2218 } 2219 } 2220 2221 cpu.Cpus = strings.Trim(cpu.Cpus, " \n\t,") 2222 2223 return validCPUResources(cpu) 2224 } 2225 2226 // setupSandboxCgroup creates and joins sandbox cgroups for the sandbox config 2227 func (s *Sandbox) setupSandboxCgroup() error { 2228 var err error 2229 spec := s.GetPatchedOCISpec() 2230 if spec == nil { 2231 return errorMissingOCISpec 2232 } 2233 2234 if spec.Linux == nil { 2235 s.Logger().WithField("sandboxid", s.id).Warning("no cgroup path provided for pod sandbox, not creating sandbox cgroup") 2236 return nil 2237 } 2238 2239 s.state.CgroupPath, err = vccgroups.ValidCgroupPath(spec.Linux.CgroupsPath, s.config.SystemdCgroup) 2240 if err != nil { 2241 return fmt.Errorf("Invalid cgroup path: %v", err) 2242 } 2243 2244 runtimePid := os.Getpid() 2245 // Add the runtime to the Kata sandbox cgroup 2246 if err = s.cgroupMgr.Add(runtimePid); err != nil { 2247 return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err) 2248 } 2249 2250 // `Apply` updates manager's Cgroups and CgroupPaths, 2251 // they both need to be saved since are used to create 2252 // or restore a cgroup managers. 2253 if s.config.Cgroups, err = s.cgroupMgr.GetCgroups(); err != nil { 2254 return fmt.Errorf("Could not get cgroup configuration: %v", err) 2255 } 2256 2257 s.state.CgroupPaths = s.cgroupMgr.GetPaths() 2258 2259 if err = s.cgroupMgr.Apply(); err != nil { 2260 return fmt.Errorf("Could not constrain cgroup: %v", err) 2261 } 2262 2263 return nil 2264 } 2265 2266 // GetPatchedOCISpec returns sandbox's OCI specification 2267 // This OCI specification was patched when the sandbox was created 2268 // by containerCapabilities(), SetEphemeralStorageType() and others 2269 // in order to support: 2270 // * capabilities 2271 // * Ephemeral storage 2272 // * k8s empty dir 2273 // If you need the original (vanilla) OCI spec, 2274 // use compatoci.GetContainerSpec() instead. 2275 func (s *Sandbox) GetPatchedOCISpec() *specs.Spec { 2276 if s.config == nil { 2277 return nil 2278 } 2279 2280 // get the container associated with the PodSandbox annotation. In Kubernetes, this 2281 // represents the pause container. In Docker, this is the container. We derive the 2282 // cgroup path from this container. 2283 for _, cConfig := range s.config.Containers { 2284 if cConfig.Annotations[annotations.ContainerTypeKey] == string(PodSandbox) { 2285 return cConfig.CustomSpec 2286 } 2287 } 2288 2289 return nil 2290 } 2291 2292 func (s *Sandbox) GetOOMEvent() (string, error) { 2293 return s.agent.getOOMEvent() 2294 } 2295 2296 // getSandboxCPUSet returns the union of each of the sandbox's containers' CPU sets' 2297 // cpus and mems as a string in canonical linux CPU/mems list format 2298 func (s *Sandbox) getSandboxCPUSet() (string, string, error) { 2299 if s.config == nil { 2300 return "", "", nil 2301 } 2302 2303 cpuResult := cpuset.NewCPUSet() 2304 memResult := cpuset.NewCPUSet() 2305 for _, ctr := range s.config.Containers { 2306 if ctr.Resources.CPU != nil { 2307 currCPUSet, err := cpuset.Parse(ctr.Resources.CPU.Cpus) 2308 if err != nil { 2309 return "", "", fmt.Errorf("unable to parse CPUset.cpus for container %s: %v", ctr.ID, err) 2310 } 2311 cpuResult = cpuResult.Union(currCPUSet) 2312 2313 currMemSet, err := cpuset.Parse(ctr.Resources.CPU.Mems) 2314 if err != nil { 2315 return "", "", fmt.Errorf("unable to parse CPUset.mems for container %s: %v", ctr.ID, err) 2316 } 2317 memResult = memResult.Union(currMemSet) 2318 } 2319 } 2320 2321 return cpuResult.String(), memResult.String(), nil 2322 } 2323 2324 // GetSandboxBlockOffset returns an offset w.r.t. the sandbox block index, to be 2325 // used when determining a virtio-block drive name. An offset may be present if 2326 // specific drive names are reserved, e.g. for a sandbox rootfs, but not 2327 // included in the BlockIndexMap. 2328 func (s *Sandbox) GetSandboxBlockOffset() int { 2329 return s.hypervisor.getVirtDriveOffset() 2330 }