github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/lib/cgutil/cpuset_manager_v1.go (about) 1 //go:build linux 2 3 package cgutil 4 5 import ( 6 "context" 7 "fmt" 8 "io/ioutil" 9 "os" 10 "path/filepath" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/hashicorp/go-hclog" 16 "github.com/hashicorp/nomad/helper" 17 "github.com/hashicorp/nomad/lib/cpuset" 18 "github.com/hashicorp/nomad/nomad/structs" 19 "github.com/opencontainers/runc/libcontainer/cgroups" 20 "github.com/opencontainers/runc/libcontainer/cgroups/fs" 21 "github.com/opencontainers/runc/libcontainer/configs" 22 "golang.org/x/sys/unix" 23 ) 24 25 const ( 26 DefaultCgroupV1Parent = "/nomad" 27 SharedCpusetCgroupName = "shared" 28 ReservedCpusetCgroupName = "reserved" 29 ) 30 31 // NewCpusetManagerV1 creates a CpusetManager compatible with cgroups.v1 32 func NewCpusetManagerV1(cgroupParent string, _ []uint16, logger hclog.Logger) CpusetManager { 33 if cgroupParent == "" { 34 cgroupParent = DefaultCgroupV1Parent 35 } 36 37 cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", cgroupParent) 38 if err != nil { 39 logger.Warn("failed to get cgroup path; disable cpuset management", "error", err) 40 return new(NoopCpusetManager) 41 } 42 43 // ensures that shared cpuset exists and that the cpuset values are copied from the parent if created 44 if err = cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil { 45 logger.Warn("failed to ensure cgroup parent exists; disable cpuset management", "error", err) 46 return new(NoopCpusetManager) 47 } 48 49 parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath) 50 if err != nil { 51 logger.Warn("failed to detect parent cpuset settings; disable cpuset management", "error", err) 52 return new(NoopCpusetManager) 53 } 54 55 parentCpuset, err := cpuset.Parse(parentCpus) 56 if err != nil { 57 logger.Warn("failed to parse parent cpuset.cpus setting; disable cpuset management", "error", err) 58 return new(NoopCpusetManager) 59 } 60 61 // ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup 62 if err = os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err != nil { 63 logger.Warn("failed to ensure reserved cpuset.cpus interface exists; disable cpuset management", "error", err) 64 return new(NoopCpusetManager) 65 } 66 67 if err = cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil { 68 logger.Warn("failed to ensure reserved cpuset.mems interface exists; disable cpuset management", "error", err) 69 return new(NoopCpusetManager) 70 } 71 72 return &cpusetManagerV1{ 73 parentCpuset: parentCpuset, 74 cgroupParent: cgroupParent, 75 cgroupParentPath: cgroupParentPath, 76 cgroupInfo: map[string]allocTaskCgroupInfo{}, 77 logger: logger, 78 } 79 } 80 81 var ( 82 cpusetReconcileInterval = 30 * time.Second 83 ) 84 85 type cpusetManagerV1 struct { 86 // cgroupParent relative to the cgroup root. ex. '/nomad' 87 cgroupParent string 88 // cgroupParentPath is the absolute path to the cgroup parent. 89 cgroupParentPath string 90 91 parentCpuset cpuset.CPUSet 92 93 // all exported functions are synchronized 94 mu sync.Mutex 95 96 cgroupInfo map[string]allocTaskCgroupInfo 97 98 doneCh chan struct{} 99 signalCh chan struct{} 100 logger hclog.Logger 101 } 102 103 func (c *cpusetManagerV1) AddAlloc(alloc *structs.Allocation) { 104 if alloc == nil || alloc.AllocatedResources == nil { 105 return 106 } 107 allocInfo := allocTaskCgroupInfo{} 108 for task, resources := range alloc.AllocatedResources.Tasks { 109 taskCpuset := cpuset.New(resources.Cpu.ReservedCores...) 110 cgroupPath := filepath.Join(c.cgroupParentPath, SharedCpusetCgroupName) 111 relativeCgroupPath := filepath.Join(c.cgroupParent, SharedCpusetCgroupName) 112 if taskCpuset.Size() > 0 { 113 cgroupPath, relativeCgroupPath = c.getCgroupPathsForTask(alloc.ID, task) 114 } 115 allocInfo[task] = &TaskCgroupInfo{ 116 CgroupPath: cgroupPath, 117 RelativeCgroupPath: relativeCgroupPath, 118 Cpuset: taskCpuset, 119 } 120 } 121 c.mu.Lock() 122 c.cgroupInfo[alloc.ID] = allocInfo 123 c.mu.Unlock() 124 go c.signalReconcile() 125 } 126 127 func (c *cpusetManagerV1) RemoveAlloc(allocID string) { 128 c.mu.Lock() 129 delete(c.cgroupInfo, allocID) 130 c.mu.Unlock() 131 go c.signalReconcile() 132 } 133 134 func (c *cpusetManagerV1) CgroupPathFor(allocID, task string) CgroupPathGetter { 135 return func(ctx context.Context) (string, error) { 136 c.mu.Lock() 137 allocInfo, ok := c.cgroupInfo[allocID] 138 if !ok { 139 c.mu.Unlock() 140 return "", fmt.Errorf("alloc not found for id %q", allocID) 141 } 142 143 taskInfo, ok := allocInfo[task] 144 c.mu.Unlock() 145 if !ok { 146 return "", fmt.Errorf("task %q not found", task) 147 } 148 149 timer, stop := helper.NewSafeTimer(0) 150 defer stop() 151 152 for { 153 154 if taskInfo.Error != nil { 155 break 156 } 157 158 if _, err := os.Stat(taskInfo.CgroupPath); os.IsNotExist(err) { 159 select { 160 case <-ctx.Done(): 161 return taskInfo.CgroupPath, ctx.Err() 162 case <-timer.C: 163 timer.Reset(100 * time.Millisecond) 164 continue 165 } 166 } 167 break 168 } 169 170 return taskInfo.CgroupPath, taskInfo.Error 171 } 172 173 } 174 175 // task name -> task cgroup info 176 type allocTaskCgroupInfo map[string]*TaskCgroupInfo 177 178 // Init checks that the cgroup parent and expected child cgroups have been created 179 // If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared 180 // cgroup is initialized. 181 func (c *cpusetManagerV1) Init() { 182 c.doneCh = make(chan struct{}) 183 c.signalCh = make(chan struct{}) 184 c.logger.Info("initialized cpuset cgroup manager", "parent", c.cgroupParent, "cpuset", c.parentCpuset.String()) 185 go c.reconcileLoop() 186 } 187 188 func (c *cpusetManagerV1) reconcileLoop() { 189 timer := time.NewTimer(0) 190 if !timer.Stop() { 191 <-timer.C 192 } 193 defer timer.Stop() 194 195 for { 196 select { 197 case <-c.doneCh: 198 c.logger.Debug("shutting down reconcile loop") 199 return 200 case <-c.signalCh: 201 timer.Reset(500 * time.Millisecond) 202 case <-timer.C: 203 c.reconcileCpusets() 204 timer.Reset(cpusetReconcileInterval) 205 } 206 } 207 } 208 209 func (c *cpusetManagerV1) reconcileCpusets() { 210 c.mu.Lock() 211 defer c.mu.Unlock() 212 sharedCpuset := cpuset.New(c.parentCpuset.ToSlice()...) 213 reservedCpuset := cpuset.New() 214 taskCpusets := map[string]*TaskCgroupInfo{} 215 for _, alloc := range c.cgroupInfo { 216 for _, task := range alloc { 217 if task.Cpuset.Size() == 0 { 218 continue 219 } 220 sharedCpuset = sharedCpuset.Difference(task.Cpuset) 221 reservedCpuset = reservedCpuset.Union(task.Cpuset) 222 taskCpusets[task.CgroupPath] = task 223 } 224 } 225 226 // look for reserved cpusets which we don't know about and remove 227 files, err := ioutil.ReadDir(c.reservedCpusetPath()) 228 if err != nil { 229 c.logger.Error("failed to list files in reserved cgroup path during reconciliation", "path", c.reservedCpusetPath(), "error", err) 230 } 231 for _, f := range files { 232 if !f.IsDir() { 233 continue 234 } 235 path := filepath.Join(c.reservedCpusetPath(), f.Name()) 236 if _, ok := taskCpusets[path]; ok { 237 continue 238 } 239 c.logger.Debug("removing reserved cpuset cgroup", "path", path) 240 err := cgroups.RemovePaths(map[string]string{"cpuset": path}) 241 if err != nil { 242 c.logger.Error("removal of existing cpuset cgroup failed", "path", path, "error", err) 243 } 244 } 245 246 if err := c.setCgroupCpusetCPUs(c.sharedCpusetPath(), sharedCpuset.String()); err != nil { 247 c.logger.Error("could not write shared cpuset.cpus", "path", c.sharedCpusetPath(), "cpuset.cpus", sharedCpuset.String(), "error", err) 248 } 249 if err := c.setCgroupCpusetCPUs(c.reservedCpusetPath(), reservedCpuset.String()); err != nil { 250 c.logger.Error("could not write reserved cpuset.cpus", "path", c.reservedCpusetPath(), "cpuset.cpus", reservedCpuset.String(), "error", err) 251 } 252 for _, info := range taskCpusets { 253 if err := os.Mkdir(info.CgroupPath, 0755); err != nil && !os.IsExist(err) { 254 c.logger.Error("failed to create new cgroup path for task", "path", info.CgroupPath, "error", err) 255 info.Error = err 256 continue 257 } 258 259 // copy cpuset.mems from parent 260 _, parentMems, err := getCpusetSubsystemSettingsV1(filepath.Dir(info.CgroupPath)) 261 if err != nil { 262 c.logger.Error("failed to read parent cgroup settings for task", "path", info.CgroupPath, "error", err) 263 info.Error = err 264 continue 265 } 266 if err := cgroups.WriteFile(info.CgroupPath, "cpuset.mems", parentMems); err != nil { 267 c.logger.Error("failed to write cgroup cpuset.mems setting for task", "path", info.CgroupPath, "mems", parentMems, "error", err) 268 info.Error = err 269 continue 270 } 271 if err := c.setCgroupCpusetCPUs(info.CgroupPath, info.Cpuset.String()); err != nil { 272 c.logger.Error("failed to write cgroup cpuset.cpus settings for task", "path", info.CgroupPath, "cpus", info.Cpuset.String(), "error", err) 273 info.Error = err 274 continue 275 } 276 } 277 } 278 279 // setCgroupCpusetCPUs will compare an existing cpuset.cpus value with an expected value, overwriting the existing if different 280 // must hold a lock on cpusetManagerV1.mu before calling 281 func (_ *cpusetManagerV1) setCgroupCpusetCPUs(path, cpus string) error { 282 currentCpusRaw, err := cgroups.ReadFile(path, "cpuset.cpus") 283 if err != nil { 284 return err 285 } 286 287 if cpus != strings.TrimSpace(currentCpusRaw) { 288 if err := cgroups.WriteFile(path, "cpuset.cpus", cpus); err != nil { 289 return err 290 } 291 } 292 return nil 293 } 294 295 func (c *cpusetManagerV1) signalReconcile() { 296 select { 297 case c.signalCh <- struct{}{}: 298 case <-c.doneCh: 299 } 300 } 301 302 func (c *cpusetManagerV1) getCgroupPathsForTask(allocID, task string) (absolute, relative string) { 303 return filepath.Join(c.reservedCpusetPath(), fmt.Sprintf("%s-%s", allocID, task)), 304 filepath.Join(c.cgroupParent, ReservedCpusetCgroupName, fmt.Sprintf("%s-%s", allocID, task)) 305 } 306 307 func (c *cpusetManagerV1) sharedCpusetPath() string { 308 return filepath.Join(c.cgroupParentPath, SharedCpusetCgroupName) 309 } 310 311 func (c *cpusetManagerV1) reservedCpusetPath() string { 312 return filepath.Join(c.cgroupParentPath, ReservedCpusetCgroupName) 313 } 314 315 func getCPUsFromCgroupV1(group string) ([]uint16, error) { 316 cgroupPath, err := GetCgroupPathHelperV1("cpuset", group) 317 if err != nil { 318 return nil, err 319 } 320 321 cgroup := &configs.Cgroup{ 322 Path: group, 323 Resources: new(configs.Resources), 324 } 325 326 paths := map[string]string{ 327 "cpuset": cgroupPath, 328 } 329 330 man, err := fs.NewManager(cgroup, paths) 331 if err != nil { 332 return nil, err 333 } 334 335 stats, err := man.GetStats() 336 if err != nil { 337 return nil, err 338 } 339 340 return stats.CPUSetStats.CPUs, nil 341 } 342 343 // cpusetEnsureParentV1 makes sure that the parent directories of current 344 // are created and populated with the proper cpus and mems files copied 345 // from their respective parent. It does that recursively, starting from 346 // the top of the cpuset hierarchy (i.e. cpuset cgroup mount point). 347 func cpusetEnsureParentV1(current string) error { 348 var st unix.Statfs_t 349 350 parent := filepath.Dir(current) 351 err := unix.Statfs(parent, &st) 352 if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC { 353 return nil 354 } 355 // Treat non-existing directory as cgroupfs as it will be created, 356 // and the root cpuset directory obviously exists. 357 if err != nil && err != unix.ENOENT { 358 return &os.PathError{Op: "statfs", Path: parent, Err: err} 359 } 360 361 if err := cpusetEnsureParentV1(parent); err != nil { 362 return err 363 } 364 if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) { 365 return err 366 } 367 return cpusetCopyIfNeededV1(current, parent) 368 } 369 370 // cpusetCopyIfNeededV1 copies the cpuset.cpus and cpuset.mems from the parent 371 // directory to the current directory if the file's contents are 0 372 func cpusetCopyIfNeededV1(current, parent string) error { 373 currentCpus, currentMems, err := getCpusetSubsystemSettingsV1(current) 374 if err != nil { 375 return err 376 } 377 parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(parent) 378 if err != nil { 379 return err 380 } 381 382 if isEmptyCpusetV1(currentCpus) { 383 if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil { 384 return err 385 } 386 } 387 if isEmptyCpusetV1(currentMems) { 388 if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil { 389 return err 390 } 391 } 392 return nil 393 } 394 395 func getCpusetSubsystemSettingsV1(parent string) (cpus, mems string, err error) { 396 if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil { 397 return 398 } 399 if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil { 400 return 401 } 402 return cpus, mems, nil 403 } 404 405 func isEmptyCpusetV1(str string) bool { 406 return str == "" || str == "\n" 407 } 408 409 func GetCgroupPathHelperV1(subsystem, cgroup string) (string, error) { 410 mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem) 411 if err != nil { 412 return "", err 413 } 414 415 // This is needed for nested containers, because in /proc/self/cgroup we 416 // see paths from host, which don't exist in container. 417 relCgroup, err := filepath.Rel(root, cgroup) 418 if err != nil { 419 return "", err 420 } 421 422 result := filepath.Join(mnt, relCgroup) 423 return result, nil 424 }