github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/pkg/cgroups/manager.go (about) 1 // Copyright (c) 2020 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 package cgroups 7 8 import ( 9 "bufio" 10 "fmt" 11 "io/ioutil" 12 "os" 13 "path/filepath" 14 "strconv" 15 "strings" 16 "sync" 17 18 "github.com/kata-containers/runtime/virtcontainers/pkg/rootless" 19 libcontcgroups "github.com/opencontainers/runc/libcontainer/cgroups" 20 libcontcgroupsfs "github.com/opencontainers/runc/libcontainer/cgroups/fs" 21 libcontcgroupssystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd" 22 "github.com/opencontainers/runc/libcontainer/configs" 23 "github.com/opencontainers/runc/libcontainer/specconv" 24 "github.com/opencontainers/runtime-spec/specs-go" 25 "github.com/sirupsen/logrus" 26 ) 27 28 type Config struct { 29 // Cgroups specifies specific cgroup settings for the various subsystems that the container is 30 // placed into to limit the resources the container has available 31 // If nil, New() will create one. 32 Cgroups *configs.Cgroup 33 34 // CgroupPaths contains paths to all the cgroups setup for a container. Key is cgroup subsystem name 35 // with the value as the path. 36 CgroupPaths map[string]string 37 38 // Resources represents the runtime resource constraints 39 Resources specs.LinuxResources 40 41 // CgroupPath is the OCI spec cgroup path 42 CgroupPath string 43 } 44 45 type Manager struct { 46 sync.Mutex 47 mgr libcontcgroups.Manager 48 } 49 50 const ( 51 // file in the cgroup that contains the pids 52 cgroupProcs = "cgroup.procs" 53 ) 54 55 var ( 56 cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups") 57 ) 58 59 // SetLogger sets up a logger for this pkg 60 func SetLogger(logger *logrus.Entry) { 61 fields := cgroupsLogger.Data 62 63 cgroupsLogger = logger.WithFields(fields) 64 } 65 66 // returns the list of devices that a hypervisor may need 67 func hypervisorDevices() []specs.LinuxDeviceCgroup { 68 devices := []specs.LinuxDeviceCgroup{} 69 70 // Processes running in a device-cgroup are constrained, they have acccess 71 // only to the devices listed in the devices.list file. 72 // In order to run Virtual Machines and create virtqueues, hypervisors 73 // need access to certain character devices in the host, like kvm and vhost-net. 74 hypervisorDevices := []string{ 75 "/dev/kvm", // To run virtual machines 76 "/dev/vhost-net", // To create virtqueues 77 "/dev/vfio/vfio", // To access VFIO devices 78 } 79 80 for _, device := range hypervisorDevices { 81 ldevice, err := DeviceToLinuxDevice(device) 82 if err != nil { 83 cgroupsLogger.WithError(err).Warnf("Could not get device information") 84 continue 85 } 86 devices = append(devices, ldevice) 87 } 88 89 return devices 90 } 91 92 // New creates a new CgroupManager 93 func New(config *Config) (*Manager, error) { 94 var err error 95 96 devices := config.Resources.Devices 97 devices = append(devices, hypervisorDevices()...) 98 // Do not modify original devices 99 config.Resources.Devices = devices 100 101 newSpec := specs.Spec{ 102 Linux: &specs.Linux{ 103 Resources: &config.Resources, 104 }, 105 } 106 107 rootless := rootless.IsRootless() 108 109 cgroups := config.Cgroups 110 cgroupPaths := config.CgroupPaths 111 112 // determine if we are utilizing systemd managed cgroups based on the path provided 113 useSystemdCgroup := IsSystemdCgroup(config.CgroupPath) 114 115 // Create a new cgroup if the current one is nil 116 // this cgroups must be saved later 117 if cgroups == nil { 118 if config.CgroupPath == "" && !rootless { 119 cgroupsLogger.Warn("cgroups have not been created and cgroup path is empty") 120 } 121 122 newSpec.Linux.CgroupsPath, err = ValidCgroupPath(config.CgroupPath, useSystemdCgroup) 123 if err != nil { 124 return nil, fmt.Errorf("Invalid cgroup path: %v", err) 125 } 126 127 if cgroups, err = specconv.CreateCgroupConfig(&specconv.CreateOpts{ 128 // cgroup name is taken from spec 129 CgroupName: "", 130 UseSystemdCgroup: useSystemdCgroup, 131 Spec: &newSpec, 132 RootlessCgroups: rootless, 133 }); err != nil { 134 return nil, fmt.Errorf("Could not create cgroup config: %v", err) 135 } 136 } 137 138 // Set cgroupPaths to nil when the map is empty, it can and will be 139 // populated by `Manager.Apply()` when the runtime or any other process 140 // is moved to the cgroup. 141 if len(cgroupPaths) == 0 { 142 cgroupPaths = nil 143 } 144 145 if useSystemdCgroup { 146 systemdCgroupFunc, err := libcontcgroupssystemd.NewSystemdCgroupsManager() 147 if err != nil { 148 return nil, fmt.Errorf("Could not create systemd cgroup manager: %v", err) 149 } 150 libcontcgroupssystemd.UseSystemd() 151 return &Manager{ 152 mgr: systemdCgroupFunc(cgroups, cgroupPaths), 153 }, nil 154 } 155 156 return &Manager{ 157 mgr: &libcontcgroupsfs.Manager{ 158 Cgroups: cgroups, 159 Rootless: rootless, 160 Paths: cgroupPaths, 161 }, 162 }, nil 163 } 164 165 // read all the pids in cgroupPath 166 func readPids(cgroupPath string) ([]int, error) { 167 pids := []int{} 168 f, err := os.Open(filepath.Join(cgroupPath, cgroupProcs)) 169 if err != nil { 170 return nil, err 171 } 172 defer f.Close() 173 buf := bufio.NewScanner(f) 174 175 for buf.Scan() { 176 if t := buf.Text(); t != "" { 177 pid, err := strconv.Atoi(t) 178 if err != nil { 179 return nil, err 180 } 181 pids = append(pids, pid) 182 } 183 } 184 return pids, nil 185 } 186 187 // write the pids into cgroup.procs 188 func writePids(pids []int, cgroupPath string) error { 189 cgroupProcsPath := filepath.Join(cgroupPath, cgroupProcs) 190 for _, pid := range pids { 191 if err := ioutil.WriteFile(cgroupProcsPath, 192 []byte(strconv.Itoa(pid)), 193 os.FileMode(0), 194 ); err != nil { 195 return err 196 } 197 } 198 return nil 199 } 200 201 func (m *Manager) logger() *logrus.Entry { 202 return cgroupsLogger.WithField("source", "cgroup-manager") 203 } 204 205 // move all the processes in the current cgroup to the parent 206 func (m *Manager) moveToParent() error { 207 m.Lock() 208 defer m.Unlock() 209 for _, cgroupPath := range m.mgr.GetPaths() { 210 211 pids, err := readPids(cgroupPath) 212 // possible that the cgroupPath doesn't exist. If so, skip: 213 if os.IsNotExist(err) { 214 // The cgroup is not present on the filesystem: no pids to move. The systemd cgroup 215 // manager lists all of the subsystems, including those that are not actually being managed. 216 continue 217 } 218 if err != nil { 219 return err 220 } 221 222 if len(pids) == 0 { 223 // no pids in this cgroup 224 continue 225 } 226 227 cgroupParentPath := filepath.Dir(filepath.Clean(cgroupPath)) 228 if err = writePids(pids, cgroupParentPath); err != nil { 229 if !strings.Contains(err.Error(), "no such process") { 230 return err 231 } 232 } 233 } 234 return nil 235 } 236 237 // Add pid to cgroups 238 func (m *Manager) Add(pid int) error { 239 if rootless.IsRootless() { 240 m.logger().Debug("Unable to setup add pids to cgroup: running rootless") 241 return nil 242 } 243 244 m.Lock() 245 defer m.Unlock() 246 return m.mgr.Apply(pid) 247 } 248 249 // Apply constraints 250 func (m *Manager) Apply() error { 251 if rootless.IsRootless() { 252 m.logger().Debug("Unable to apply constraints: running rootless") 253 return nil 254 } 255 256 cgroups, err := m.GetCgroups() 257 if err != nil { 258 return err 259 } 260 261 m.Lock() 262 defer m.Unlock() 263 return m.mgr.Set(&configs.Config{ 264 Cgroups: cgroups, 265 }) 266 } 267 268 func (m *Manager) GetCgroups() (*configs.Cgroup, error) { 269 m.Lock() 270 defer m.Unlock() 271 return m.mgr.GetCgroups() 272 } 273 274 func (m *Manager) GetPaths() map[string]string { 275 m.Lock() 276 defer m.Unlock() 277 return m.mgr.GetPaths() 278 } 279 280 func (m *Manager) Destroy() error { 281 // cgroup can't be destroyed if it contains running processes 282 if err := m.moveToParent(); err != nil { 283 // If the process migration to the parent cgroup fails, then 284 // we expect the Destroy to fail as well. Let's log an error here 285 // and attempt to execute the Destroy still to help cleanup the hosts' FS. 286 m.logger().WithError(err).Error("Could not move processes into parent cgroup") 287 } 288 289 m.Lock() 290 defer m.Unlock() 291 return m.mgr.Destroy() 292 } 293 294 // AddDevice adds a device to the device cgroup 295 func (m *Manager) AddDevice(device string) error { 296 cgroups, err := m.GetCgroups() 297 if err != nil { 298 return err 299 } 300 301 ld, err := DeviceToCgroupDevice(device) 302 if err != nil { 303 return err 304 } 305 306 m.Lock() 307 cgroups.Devices = append(cgroups.Devices, ld) 308 m.Unlock() 309 310 return m.Apply() 311 } 312 313 // RemoceDevice removed a device from the device cgroup 314 func (m *Manager) RemoveDevice(device string) error { 315 cgroups, err := m.GetCgroups() 316 if err != nil { 317 return err 318 } 319 320 m.Lock() 321 for i, d := range cgroups.Devices { 322 if d.Path == device { 323 cgroups.Devices = append(cgroups.Devices[:i], cgroups.Devices[i+1:]...) 324 m.Unlock() 325 return m.Apply() 326 } 327 } 328 m.Unlock() 329 return fmt.Errorf("device %v not found in the cgroup", device) 330 } 331 332 func (m *Manager) SetCPUSet(cpuset, memset string) error { 333 cgroups, err := m.GetCgroups() 334 if err != nil { 335 return err 336 } 337 338 m.Lock() 339 cgroups.CpusetCpus = cpuset 340 cgroups.CpusetMems = memset 341 m.Unlock() 342 343 return m.Apply() 344 }