github.com/kubewharf/katalyst-core@v0.5.3/pkg/util/cgroup/manager/cgroup.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package manager 18 19 import ( 20 "context" 21 "fmt" 22 "io/fs" 23 "math" 24 "os/exec" 25 "path/filepath" 26 "time" 27 28 "github.com/kubewharf/katalyst-core/pkg/metrics" 29 "github.com/kubewharf/katalyst-core/pkg/util/asyncworker" 30 "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" 31 "github.com/kubewharf/katalyst-core/pkg/util/general" 32 ) 33 34 func ApplyMemoryWithRelativePath(relCgroupPath string, data *common.MemoryData) error { 35 if data == nil { 36 return fmt.Errorf("ApplyMemoryWithRelativePath with nil cgroup data") 37 } 38 39 absCgroupPath := common.GetAbsCgroupPath("memory", relCgroupPath) 40 return GetManager().ApplyMemory(absCgroupPath, data) 41 } 42 43 func ApplyCPUWithRelativePath(relCgroupPath string, data *common.CPUData) error { 44 if data == nil { 45 return fmt.Errorf("ApplyCPUWithRelativePath with nil cgroup data") 46 } 47 48 absCgroupPath := common.GetAbsCgroupPath("cpu", relCgroupPath) 49 return GetManager().ApplyCPU(absCgroupPath, data) 50 } 51 52 func ApplyCPUSetWithRelativePath(relCgroupPath string, data *common.CPUSetData) error { 53 if data == nil { 54 return fmt.Errorf("ApplyCPUSetForContainer with nil cgroup data") 55 } 56 57 absCgroupPath := common.GetAbsCgroupPath("cpuset", relCgroupPath) 58 return GetManager().ApplyCPUSet(absCgroupPath, data) 59 } 60 61 func ApplyCPUSetWithAbsolutePath(absCgroupPath string, data *common.CPUSetData) error { 62 if data == nil { 63 return fmt.Errorf("ApplyCPUSetWithAbsolutePath with nil cgroup data") 64 } 65 66 return GetManager().ApplyCPUSet(absCgroupPath, data) 67 } 68 69 func ApplyCPUSetForContainer(podUID, containerId string, data *common.CPUSetData) error { 70 if data == nil { 71 return fmt.Errorf("ApplyCPUSetForContainer with nil cgroup data") 72 } 73 74 cpusetAbsCGPath, err := common.GetContainerAbsCgroupPath(common.CgroupSubsysCPUSet, podUID, containerId) 75 if err != nil { 76 return fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err) 77 } 78 79 return ApplyCPUSetWithAbsolutePath(cpusetAbsCGPath, data) 80 } 81 82 func ApplyNetClsWithRelativePath(relCgroupPath string, data *common.NetClsData) error { 83 if data == nil { 84 return fmt.Errorf("ApplyNetClsWithRelativePath with nil cgroup data") 85 } 86 87 absCgroupPath := common.GetAbsCgroupPath("net_cls", relCgroupPath) 88 return GetManager().ApplyNetCls(absCgroupPath, data) 89 } 90 91 func ApplyNetClsWithAbsolutePath(absCgroupPath string, data *common.NetClsData) error { 92 if data == nil { 93 return fmt.Errorf("ApplyNetClsWithRelativePath with nil cgroup data") 94 } 95 96 return GetManager().ApplyNetCls(absCgroupPath, data) 97 } 98 99 // ApplyNetClsForContainer applies the net_cls config for a container. 100 func ApplyNetClsForContainer(podUID, containerId string, data *common.NetClsData) error { 101 if data == nil { 102 return fmt.Errorf("ApplyNetClass with nil cgroup data") 103 } 104 105 netClsAbsCGPath, err := common.GetContainerAbsCgroupPath(common.CgroupSubsysNetCls, podUID, containerId) 106 if err != nil { 107 return fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err) 108 } 109 110 return ApplyNetClsWithAbsolutePath(netClsAbsCGPath, data) 111 } 112 113 func ApplyIOCostQoSWithRelativePath(relCgroupPath string, devID string, data *common.IOCostQoSData) error { 114 if data == nil { 115 return fmt.Errorf("ApplyIOCostQoSWithRelativePath with nil cgroup data") 116 } 117 118 absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath) 119 return ApplyIOCostQoSWithAbsolutePath(absCgroupPath, devID, data) 120 } 121 122 func ApplyIOCostQoSWithAbsolutePath(absCgroupPath string, devID string, data *common.IOCostQoSData) error { 123 if data == nil { 124 return fmt.Errorf("ApplyIOCostQoSWithAbsolutePath with nil cgroup data") 125 } 126 127 return GetManager().ApplyIOCostQoS(absCgroupPath, devID, data) 128 } 129 130 func ApplyIOCostModelWithRelativePath(relCgroupPath string, devID string, data *common.IOCostModelData) error { 131 if data == nil { 132 return fmt.Errorf("ApplyIOCostModelWithRelativePath with nil cgroup data") 133 } 134 135 absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath) 136 return ApplyIOCostModelWithAbsolutePath(absCgroupPath, devID, data) 137 } 138 139 func ApplyIOCostModelWithAbsolutePath(absCgroupPath string, devID string, data *common.IOCostModelData) error { 140 if data == nil { 141 return fmt.Errorf("ApplyIOCostModelWithAbsolutePath with nil cgroup data") 142 } 143 144 return GetManager().ApplyIOCostModel(absCgroupPath, devID, data) 145 } 146 147 func ApplyIOWeightWithRelativePath(relCgroupPath string, devID string, weight uint64) error { 148 absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath) 149 return ApplyIOWeightWithAbsolutePath(absCgroupPath, devID, weight) 150 } 151 152 func ApplyIOWeightWithAbsolutePath(absCgroupPath string, devID string, weight uint64) error { 153 return GetManager().ApplyIOWeight(absCgroupPath, devID, weight) 154 } 155 156 func ApplyUnifiedDataWithAbsolutePath(absCgroupPath, cgroupFileName, data string) error { 157 return GetManager().ApplyUnifiedData(absCgroupPath, cgroupFileName, data) 158 } 159 160 // ApplyUnifiedDataForContainer applies the data to cgroupFileName in subsys for a container. 161 func ApplyUnifiedDataForContainer(podUID, containerId, subsys, cgroupFileName, data string) error { 162 absCgroupPath, err := common.GetContainerAbsCgroupPath(subsys, podUID, containerId) 163 if err != nil { 164 return fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err) 165 } 166 167 return ApplyUnifiedDataWithAbsolutePath(absCgroupPath, cgroupFileName, data) 168 } 169 170 func GetMemoryWithRelativePath(relCgroupPath string) (*common.MemoryStats, error) { 171 absCgroupPath := common.GetAbsCgroupPath("memory", relCgroupPath) 172 return GetManager().GetMemory(absCgroupPath) 173 } 174 175 func GetMemoryWithAbsolutePath(absCgroupPath string) (*common.MemoryStats, error) { 176 return GetManager().GetMemory(absCgroupPath) 177 } 178 179 func GetIOCostQoSWithRelativePath(relCgroupPath string) (map[string]*common.IOCostQoSData, error) { 180 absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath) 181 return GetIOCostQoSWithAbsolutePath(absCgroupPath) 182 } 183 184 func GetIOCostQoSWithAbsolutePath(absCgroupPath string) (map[string]*common.IOCostQoSData, error) { 185 return GetManager().GetIOCostQoS(absCgroupPath) 186 } 187 188 func GetIOCostModelWithRelativePath(relCgroupPath string) (map[string]*common.IOCostModelData, error) { 189 absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath) 190 return GetIOCostModelWithAbsolutePath(absCgroupPath) 191 } 192 193 func GetIOCostModelWithAbsolutePath(absCgroupPath string) (map[string]*common.IOCostModelData, error) { 194 return GetManager().GetIOCostModel(absCgroupPath) 195 } 196 197 func GetDeviceIOWeightWithRelativePath(relCgroupPath, devID string) (uint64, bool, error) { 198 absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath) 199 return GetDeviceIOWeightWithAbsolutePath(absCgroupPath, devID) 200 } 201 202 func GetDeviceIOWeightWithAbsolutePath(absCgroupPath, devID string) (uint64, bool, error) { 203 return GetManager().GetDeviceIOWeight(absCgroupPath, devID) 204 } 205 206 func GetIOStatWithRelativePath(relCgroupPath string) (map[string]map[string]string, error) { 207 absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath) 208 return GetIOStatWithAbsolutePath(absCgroupPath) 209 } 210 211 func GetIOStatWithAbsolutePath(absCgroupPath string) (map[string]map[string]string, error) { 212 return GetManager().GetIOStat(absCgroupPath) 213 } 214 215 func GetCPUWithRelativePath(relCgroupPath string) (*common.CPUStats, error) { 216 absCgroupPath := common.GetAbsCgroupPath("cpu", relCgroupPath) 217 return GetManager().GetCPU(absCgroupPath) 218 } 219 220 func GetCPUSetWithAbsolutePath(absCgroupPath string) (*common.CPUSetStats, error) { 221 return GetManager().GetCPUSet(absCgroupPath) 222 } 223 224 func GetCPUSetWithRelativePath(relCgroupPath string) (*common.CPUSetStats, error) { 225 absCgroupPath := common.GetAbsCgroupPath("cpuset", relCgroupPath) 226 return GetManager().GetCPUSet(absCgroupPath) 227 } 228 229 func GetMetricsWithRelativePath(relCgroupPath string, subsystems map[string]struct{}) (*common.CgroupMetrics, error) { 230 return GetManager().GetMetrics(relCgroupPath, subsystems) 231 } 232 233 func GetPidsWithRelativePath(relCgroupPath string) ([]string, error) { 234 absCgroupPath := common.GetAbsCgroupPath(common.DefaultSelectedSubsys, relCgroupPath) 235 return GetManager().GetPids(absCgroupPath) 236 } 237 238 func GetPidsWithAbsolutePath(absCgroupPath string) ([]string, error) { 239 return GetManager().GetPids(absCgroupPath) 240 } 241 242 func GetTasksWithRelativePath(cgroupPath, subsys string) ([]string, error) { 243 absCgroupPath := common.GetAbsCgroupPath(subsys, cgroupPath) 244 return GetManager().GetTasks(absCgroupPath) 245 } 246 247 func GetTasksWithAbsolutePath(absCgroupPath string) ([]string, error) { 248 return GetManager().GetTasks(absCgroupPath) 249 } 250 251 func GetCPUSetForContainer(podUID, containerId string) (*common.CPUSetStats, error) { 252 cpusetAbsCGPath, err := common.GetContainerAbsCgroupPath(common.CgroupSubsysCPUSet, podUID, containerId) 253 if err != nil { 254 return nil, fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err) 255 } 256 257 return GetCPUSetWithAbsolutePath(cpusetAbsCGPath) 258 } 259 260 func DropCacheWithTimeoutForContainer(ctx context.Context, podUID, containerId string, timeoutSecs int, nbytes int64) error { 261 memoryAbsCGPath, err := common.GetContainerAbsCgroupPath(common.CgroupSubsysMemory, podUID, containerId) 262 if err != nil { 263 return fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err) 264 } 265 266 err = DropCacheWithTimeoutAndAbsCGPath(timeoutSecs, memoryAbsCGPath, nbytes) 267 _ = asyncworker.EmitAsyncedMetrics(ctx, metrics.ConvertMapToTags(map[string]string{ 268 "podUID": podUID, 269 "containerID": containerId, 270 "succeeded": fmt.Sprintf("%v", err == nil), 271 })...) 272 return err 273 } 274 275 func DropCacheWithTimeoutAndAbsCGPath(timeoutSecs int, absCgroupPath string, nbytes int64) error { 276 startTime := time.Now() 277 278 var cmd string 279 if common.CheckCgroup2UnifiedMode() { 280 if nbytes == 0 { 281 general.Infof("[DropCacheWithTimeoutAndAbsCGPath] skip drop cache on %s since nbytes is zero", absCgroupPath) 282 return nil 283 } 284 // cgv2 285 cmd = fmt.Sprintf("timeout %d echo %d > %s", timeoutSecs, nbytes, filepath.Join(absCgroupPath, "memory.reclaim")) 286 } else { 287 // cgv1 288 cmd = fmt.Sprintf("timeout %d echo 0 > %s", timeoutSecs, filepath.Join(absCgroupPath, "memory.force_empty")) 289 } 290 291 _, err := exec.Command("bash", "-c", cmd).Output() 292 293 delta := time.Since(startTime).Seconds() 294 general.Infof("[DropCacheWithTimeoutAndAbsCGPath] it takes %v to do \"%s\" on cgroup: %s", delta, cmd, absCgroupPath) 295 296 // if this command timeout, a none-nil error will be returned, 297 // but we should return error iff error returns without timeout 298 if err != nil && int(delta) < timeoutSecs { 299 return err 300 } 301 302 return nil 303 } 304 305 func SetExtraCGMemLimitWithTimeoutAndRelCGPath(ctx context.Context, relCgroupPath string, timeoutSecs int, nbytes int64) error { 306 memoryAbsCGPath := common.GetAbsCgroupPath(common.CgroupSubsysMemory, relCgroupPath) 307 308 err := SetExtraCGMemLimitWithTimeoutAndAbsCGPath(timeoutSecs, memoryAbsCGPath, nbytes) 309 _ = asyncworker.EmitAsyncedMetrics(ctx, metrics.ConvertMapToTags(map[string]string{ 310 "relCgroupPath": relCgroupPath, 311 "succeeded": fmt.Sprintf("%v", err == nil), 312 })...) 313 return err 314 } 315 316 func SetExtraCGMemLimitWithTimeoutAndAbsCGPath(timeoutSecs int, absCgroupPath string, nbytes int64) error { 317 if nbytes == 0 { 318 return fmt.Errorf("invalid memory limit nbytes: %d", nbytes) 319 } 320 321 startTime := time.Now() 322 323 var interfacePath string 324 if common.CheckCgroup2UnifiedMode() { 325 if nbytes == 0 { 326 general.Infof("[SetExtraCGMemLimitWithTimeoutAndAbsCGPath] skip drop cache on %s since nbytes is zero", absCgroupPath) 327 return nil 328 } 329 // cgv2 330 interfacePath = filepath.Join(absCgroupPath, "memory.max") 331 } else { 332 // cgv1 333 interfacePath = filepath.Join(absCgroupPath, "memory.limit_in_bytes") 334 } 335 336 cmd := fmt.Sprintf("timeout %d echo %d > %s", timeoutSecs, nbytes, interfacePath) 337 338 _, err := exec.Command("bash", "-c", cmd).Output() 339 340 delta := time.Since(startTime).Seconds() 341 general.Infof("[SetExtraCGMemLimitWithTimeoutAndAbsCGPath] it takes %v to do \"%s\" on cgroup: %s", delta, cmd, absCgroupPath) 342 343 // if this command timeout, a none-nil error will be returned, 344 // but we should return error iff error returns without timeout 345 if err != nil && int(delta) < timeoutSecs { 346 return err 347 } 348 349 return nil 350 } 351 352 func SetSwapMaxWithAbsolutePathToParentCgroupRecursive(absCgroupPath string) error { 353 if !common.CheckCgroup2UnifiedMode() { 354 general.Infof("[SetSwapMaxWithAbsolutePathToParentCgroupRecursive] is not supported on cgroupv1") 355 return nil 356 } 357 general.Infof("[SetSwapMaxWithAbsolutePathToParentCgroupRecursive] on cgroup: %s", absCgroupPath) 358 swapMaxData := &common.MemoryData{SwapMaxInBytes: math.MaxInt64} 359 err := GetManager().ApplyMemory(absCgroupPath, swapMaxData) 360 if err != nil { 361 return err 362 } 363 364 parentDir := filepath.Dir(absCgroupPath) 365 if parentDir != absCgroupPath && parentDir != common.GetCgroupRootPath(common.CgroupSubsysMemory) { 366 err = SetSwapMaxWithAbsolutePathToParentCgroupRecursive(parentDir) 367 if err != nil { 368 return err 369 } 370 } 371 return nil 372 } 373 374 func SetSwapMaxWithAbsolutePathRecursive(absCgroupPath string) error { 375 if !common.CheckCgroup2UnifiedMode() { 376 general.Infof("[SetSwapMaxWithAbsolutePathRecursive] is not supported on cgroupv1") 377 return nil 378 } 379 380 general.Infof("[SetSwapMaxWithAbsolutePathRecursive] on cgroup: %s", absCgroupPath) 381 382 // set swap max to parent cgroups recursively 383 if err := SetSwapMaxWithAbsolutePathToParentCgroupRecursive(filepath.Dir(absCgroupPath)); err != nil { 384 return err 385 } 386 387 // set swap max to sub cgroups recursively 388 err := filepath.Walk(absCgroupPath, func(path string, info fs.FileInfo, err error) error { 389 if err != nil { 390 general.Infof("prevent panic by handling failure accessing a path: %s, err: %v", path, err) 391 return err 392 } 393 if info.IsDir() { 394 memStats, err := GetMemoryWithAbsolutePath(path) 395 if err != nil { 396 return filepath.SkipDir 397 } 398 var diff int64 = math.MaxInt64 399 if memStats.Limit-memStats.Usage < uint64(diff) { 400 diff = int64(memStats.Limit - memStats.Usage) 401 } 402 swapMaxData := &common.MemoryData{SwapMaxInBytes: diff} 403 err = GetManager().ApplyMemory(path, swapMaxData) 404 if err != nil { 405 return filepath.SkipDir 406 } 407 } 408 return nil 409 }) 410 if err != nil { 411 general.Infof("error walking the path: %s, err: %v", absCgroupPath, err) 412 return err 413 } 414 return nil 415 } 416 417 func DisableSwapMaxWithAbsolutePathRecursive(absCgroupPath string) error { 418 if !common.CheckCgroup2UnifiedMode() { 419 general.Infof("[DisableSwapMaxWithAbsolutePathRecursive] is not supported on cgroupv1") 420 return nil 421 } 422 general.Infof("[DisableSwapMaxWithAbsolutePathRecursive] on cgroup: %s", absCgroupPath) 423 // disable swap to sub cgroups recursively 424 err := filepath.Walk(absCgroupPath, func(path string, info fs.FileInfo, err error) error { 425 if err != nil { 426 general.Infof("prevent panic by handling failure accessing a path: %s, err: %v", path, err) 427 return err 428 } 429 if info.IsDir() { 430 swapMaxData := &common.MemoryData{SwapMaxInBytes: -1} 431 err = GetManager().ApplyMemory(path, swapMaxData) 432 if err != nil { 433 return filepath.SkipDir 434 } 435 } 436 return nil 437 }) 438 if err != nil { 439 general.Infof("error walking the path: %s, err: %v ", absCgroupPath, err) 440 return err 441 } 442 return nil 443 } 444 445 func MemoryOffloadingWithAbsolutePath(ctx context.Context, absCgroupPath string, nbytes int64) error { 446 startTime := time.Now() 447 448 var cmd string 449 if common.CheckCgroup2UnifiedMode() { 450 if nbytes <= 0 { 451 general.Infof("[MemoryOffloadingWithAbsolutePath] skip memory reclaim on %s since nbytes is not valid", absCgroupPath) 452 return nil 453 } 454 // cgv2 455 cmd = fmt.Sprintf("echo %d > %s", nbytes, filepath.Join(absCgroupPath, "memory.reclaim")) 456 } else { 457 // cgv1 458 general.Infof("[MemoryOffloadingWithAbsolutePath] is not supported on cgroupv1") 459 return nil 460 } 461 462 _, err := exec.Command("bash", "-c", cmd).Output() 463 464 _ = asyncworker.EmitAsyncedMetrics(ctx, metrics.ConvertMapToTags(map[string]string{ 465 "absCGPath": absCgroupPath, 466 "succeeded": fmt.Sprintf("%v", err == nil), 467 })...) 468 delta := time.Since(startTime).Seconds() 469 general.Infof("[MemoryOffloadingWithAbsolutePath] it takes %v to do \"%s\" on cgroup: %s", delta, cmd, absCgroupPath) 470 471 return err 472 }