github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/drivers/docker/reconcile_cpuset.go (about) 1 //go:build linux 2 3 package docker 4 5 import ( 6 "context" 7 "fmt" 8 "path/filepath" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/go-hclog" 13 "github.com/hashicorp/nomad/client/lib/cgutil" 14 "github.com/hashicorp/nomad/helper" 15 ) 16 17 const ( 18 cpusetReconcileInterval = 1 * time.Second 19 ) 20 21 type CpusetFixer interface { 22 Start() 23 } 24 25 // cpusetFixer adjusts the cpuset.cpus cgroup value to the assigned value by Nomad. 26 // 27 // Due to Docker not allowing the configuration of the full cgroup path, we must 28 // manually fix the cpuset values for all docker containers continuously, as the 29 // values will change as tasks of any driver using reserved cores are started and 30 // stopped, changing the size of the remaining shared cpu pool. 31 // 32 // The exec/java, podman, and containerd runtimes let you specify the cgroup path, 33 // making use of the cgroup Nomad creates and manages on behalf of the task. 34 type cpusetFixer struct { 35 ctx context.Context 36 logger hclog.Logger 37 interval time.Duration 38 once sync.Once 39 tasks func() map[coordinate]struct{} 40 } 41 42 func newCpusetFixer(d *Driver) CpusetFixer { 43 return &cpusetFixer{ 44 interval: cpusetReconcileInterval, 45 ctx: d.ctx, 46 logger: d.logger, 47 tasks: d.trackedTasks, 48 } 49 } 50 51 // Start will start the background cpuset reconciliation until the cf context is 52 // cancelled for shutdown. 53 // 54 // Only runs if cgroups.v2 is in use. 55 func (cf *cpusetFixer) Start() { 56 cf.once.Do(func() { 57 if cgutil.UseV2 { 58 go cf.loop() 59 } 60 }) 61 } 62 63 func (cf *cpusetFixer) loop() { 64 timer, cancel := helper.NewSafeTimer(0) 65 defer cancel() 66 67 for { 68 select { 69 case <-cf.ctx.Done(): 70 return 71 case <-timer.C: 72 timer.Stop() 73 cf.apply() 74 timer.Reset(cf.interval) 75 } 76 } 77 } 78 79 func (cf *cpusetFixer) apply() { 80 coordinates := cf.tasks() 81 for c := range coordinates { 82 cf.fix(c) 83 } 84 } 85 86 func (cf *cpusetFixer) fix(c coordinate) { 87 source := c.NomadCgroup() 88 destination := c.DockerCgroup() 89 if err := cgutil.CopyCpuset(source, destination); err != nil { 90 cf.logger.Debug("failed to copy cpuset", "error", err) 91 } 92 } 93 94 type coordinate struct { 95 containerID string 96 allocID string 97 task string 98 path string 99 } 100 101 func (c coordinate) NomadCgroup() string { 102 parent, _ := cgutil.SplitPath(c.path) 103 return filepath.Join(cgutil.CgroupRoot, parent, cgutil.CgroupScope(c.allocID, c.task)) 104 } 105 106 func (c coordinate) DockerCgroup() string { 107 parent, _ := cgutil.SplitPath(c.path) 108 return filepath.Join(cgutil.CgroupRoot, parent, fmt.Sprintf("docker-%s.scope", c.containerID)) 109 } 110 111 func (d *Driver) trackedTasks() map[coordinate]struct{} { 112 d.tasks.lock.RLock() 113 defer d.tasks.lock.RUnlock() 114 115 m := make(map[coordinate]struct{}, len(d.tasks.store)) 116 for _, h := range d.tasks.store { 117 m[coordinate{ 118 containerID: h.containerID, 119 allocID: h.task.AllocID, 120 task: h.task.Name, 121 path: h.task.Resources.LinuxResources.CpusetCgroupPath, 122 }] = struct{}{} 123 } 124 return m 125 }