github.com/kubewharf/katalyst-core@v0.5.3/pkg/util/cgroup/manager/cgroup.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package manager
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"io/fs"
    23  	"math"
    24  	"os/exec"
    25  	"path/filepath"
    26  	"time"
    27  
    28  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    29  	"github.com/kubewharf/katalyst-core/pkg/util/asyncworker"
    30  	"github.com/kubewharf/katalyst-core/pkg/util/cgroup/common"
    31  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    32  )
    33  
    34  func ApplyMemoryWithRelativePath(relCgroupPath string, data *common.MemoryData) error {
    35  	if data == nil {
    36  		return fmt.Errorf("ApplyMemoryWithRelativePath with nil cgroup data")
    37  	}
    38  
    39  	absCgroupPath := common.GetAbsCgroupPath("memory", relCgroupPath)
    40  	return GetManager().ApplyMemory(absCgroupPath, data)
    41  }
    42  
    43  func ApplyCPUWithRelativePath(relCgroupPath string, data *common.CPUData) error {
    44  	if data == nil {
    45  		return fmt.Errorf("ApplyCPUWithRelativePath with nil cgroup data")
    46  	}
    47  
    48  	absCgroupPath := common.GetAbsCgroupPath("cpu", relCgroupPath)
    49  	return GetManager().ApplyCPU(absCgroupPath, data)
    50  }
    51  
    52  func ApplyCPUSetWithRelativePath(relCgroupPath string, data *common.CPUSetData) error {
    53  	if data == nil {
    54  		return fmt.Errorf("ApplyCPUSetForContainer with nil cgroup data")
    55  	}
    56  
    57  	absCgroupPath := common.GetAbsCgroupPath("cpuset", relCgroupPath)
    58  	return GetManager().ApplyCPUSet(absCgroupPath, data)
    59  }
    60  
    61  func ApplyCPUSetWithAbsolutePath(absCgroupPath string, data *common.CPUSetData) error {
    62  	if data == nil {
    63  		return fmt.Errorf("ApplyCPUSetWithAbsolutePath with nil cgroup data")
    64  	}
    65  
    66  	return GetManager().ApplyCPUSet(absCgroupPath, data)
    67  }
    68  
    69  func ApplyCPUSetForContainer(podUID, containerId string, data *common.CPUSetData) error {
    70  	if data == nil {
    71  		return fmt.Errorf("ApplyCPUSetForContainer with nil cgroup data")
    72  	}
    73  
    74  	cpusetAbsCGPath, err := common.GetContainerAbsCgroupPath(common.CgroupSubsysCPUSet, podUID, containerId)
    75  	if err != nil {
    76  		return fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err)
    77  	}
    78  
    79  	return ApplyCPUSetWithAbsolutePath(cpusetAbsCGPath, data)
    80  }
    81  
    82  func ApplyNetClsWithRelativePath(relCgroupPath string, data *common.NetClsData) error {
    83  	if data == nil {
    84  		return fmt.Errorf("ApplyNetClsWithRelativePath with nil cgroup data")
    85  	}
    86  
    87  	absCgroupPath := common.GetAbsCgroupPath("net_cls", relCgroupPath)
    88  	return GetManager().ApplyNetCls(absCgroupPath, data)
    89  }
    90  
    91  func ApplyNetClsWithAbsolutePath(absCgroupPath string, data *common.NetClsData) error {
    92  	if data == nil {
    93  		return fmt.Errorf("ApplyNetClsWithRelativePath with nil cgroup data")
    94  	}
    95  
    96  	return GetManager().ApplyNetCls(absCgroupPath, data)
    97  }
    98  
    99  // ApplyNetClsForContainer applies the net_cls config for a container.
   100  func ApplyNetClsForContainer(podUID, containerId string, data *common.NetClsData) error {
   101  	if data == nil {
   102  		return fmt.Errorf("ApplyNetClass with nil cgroup data")
   103  	}
   104  
   105  	netClsAbsCGPath, err := common.GetContainerAbsCgroupPath(common.CgroupSubsysNetCls, podUID, containerId)
   106  	if err != nil {
   107  		return fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err)
   108  	}
   109  
   110  	return ApplyNetClsWithAbsolutePath(netClsAbsCGPath, data)
   111  }
   112  
   113  func ApplyIOCostQoSWithRelativePath(relCgroupPath string, devID string, data *common.IOCostQoSData) error {
   114  	if data == nil {
   115  		return fmt.Errorf("ApplyIOCostQoSWithRelativePath with nil cgroup data")
   116  	}
   117  
   118  	absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath)
   119  	return ApplyIOCostQoSWithAbsolutePath(absCgroupPath, devID, data)
   120  }
   121  
   122  func ApplyIOCostQoSWithAbsolutePath(absCgroupPath string, devID string, data *common.IOCostQoSData) error {
   123  	if data == nil {
   124  		return fmt.Errorf("ApplyIOCostQoSWithAbsolutePath with nil cgroup data")
   125  	}
   126  
   127  	return GetManager().ApplyIOCostQoS(absCgroupPath, devID, data)
   128  }
   129  
   130  func ApplyIOCostModelWithRelativePath(relCgroupPath string, devID string, data *common.IOCostModelData) error {
   131  	if data == nil {
   132  		return fmt.Errorf("ApplyIOCostModelWithRelativePath with nil cgroup data")
   133  	}
   134  
   135  	absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath)
   136  	return ApplyIOCostModelWithAbsolutePath(absCgroupPath, devID, data)
   137  }
   138  
   139  func ApplyIOCostModelWithAbsolutePath(absCgroupPath string, devID string, data *common.IOCostModelData) error {
   140  	if data == nil {
   141  		return fmt.Errorf("ApplyIOCostModelWithAbsolutePath with nil cgroup data")
   142  	}
   143  
   144  	return GetManager().ApplyIOCostModel(absCgroupPath, devID, data)
   145  }
   146  
   147  func ApplyIOWeightWithRelativePath(relCgroupPath string, devID string, weight uint64) error {
   148  	absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath)
   149  	return ApplyIOWeightWithAbsolutePath(absCgroupPath, devID, weight)
   150  }
   151  
   152  func ApplyIOWeightWithAbsolutePath(absCgroupPath string, devID string, weight uint64) error {
   153  	return GetManager().ApplyIOWeight(absCgroupPath, devID, weight)
   154  }
   155  
   156  func ApplyUnifiedDataWithAbsolutePath(absCgroupPath, cgroupFileName, data string) error {
   157  	return GetManager().ApplyUnifiedData(absCgroupPath, cgroupFileName, data)
   158  }
   159  
   160  // ApplyUnifiedDataForContainer applies the data to cgroupFileName in subsys for a container.
   161  func ApplyUnifiedDataForContainer(podUID, containerId, subsys, cgroupFileName, data string) error {
   162  	absCgroupPath, err := common.GetContainerAbsCgroupPath(subsys, podUID, containerId)
   163  	if err != nil {
   164  		return fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err)
   165  	}
   166  
   167  	return ApplyUnifiedDataWithAbsolutePath(absCgroupPath, cgroupFileName, data)
   168  }
   169  
   170  func GetMemoryWithRelativePath(relCgroupPath string) (*common.MemoryStats, error) {
   171  	absCgroupPath := common.GetAbsCgroupPath("memory", relCgroupPath)
   172  	return GetManager().GetMemory(absCgroupPath)
   173  }
   174  
   175  func GetMemoryWithAbsolutePath(absCgroupPath string) (*common.MemoryStats, error) {
   176  	return GetManager().GetMemory(absCgroupPath)
   177  }
   178  
   179  func GetIOCostQoSWithRelativePath(relCgroupPath string) (map[string]*common.IOCostQoSData, error) {
   180  	absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath)
   181  	return GetIOCostQoSWithAbsolutePath(absCgroupPath)
   182  }
   183  
   184  func GetIOCostQoSWithAbsolutePath(absCgroupPath string) (map[string]*common.IOCostQoSData, error) {
   185  	return GetManager().GetIOCostQoS(absCgroupPath)
   186  }
   187  
   188  func GetIOCostModelWithRelativePath(relCgroupPath string) (map[string]*common.IOCostModelData, error) {
   189  	absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath)
   190  	return GetIOCostModelWithAbsolutePath(absCgroupPath)
   191  }
   192  
   193  func GetIOCostModelWithAbsolutePath(absCgroupPath string) (map[string]*common.IOCostModelData, error) {
   194  	return GetManager().GetIOCostModel(absCgroupPath)
   195  }
   196  
   197  func GetDeviceIOWeightWithRelativePath(relCgroupPath, devID string) (uint64, bool, error) {
   198  	absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath)
   199  	return GetDeviceIOWeightWithAbsolutePath(absCgroupPath, devID)
   200  }
   201  
   202  func GetDeviceIOWeightWithAbsolutePath(absCgroupPath, devID string) (uint64, bool, error) {
   203  	return GetManager().GetDeviceIOWeight(absCgroupPath, devID)
   204  }
   205  
   206  func GetIOStatWithRelativePath(relCgroupPath string) (map[string]map[string]string, error) {
   207  	absCgroupPath := common.GetAbsCgroupPath(common.CgroupSubsysIO, relCgroupPath)
   208  	return GetIOStatWithAbsolutePath(absCgroupPath)
   209  }
   210  
   211  func GetIOStatWithAbsolutePath(absCgroupPath string) (map[string]map[string]string, error) {
   212  	return GetManager().GetIOStat(absCgroupPath)
   213  }
   214  
   215  func GetCPUWithRelativePath(relCgroupPath string) (*common.CPUStats, error) {
   216  	absCgroupPath := common.GetAbsCgroupPath("cpu", relCgroupPath)
   217  	return GetManager().GetCPU(absCgroupPath)
   218  }
   219  
   220  func GetCPUSetWithAbsolutePath(absCgroupPath string) (*common.CPUSetStats, error) {
   221  	return GetManager().GetCPUSet(absCgroupPath)
   222  }
   223  
   224  func GetCPUSetWithRelativePath(relCgroupPath string) (*common.CPUSetStats, error) {
   225  	absCgroupPath := common.GetAbsCgroupPath("cpuset", relCgroupPath)
   226  	return GetManager().GetCPUSet(absCgroupPath)
   227  }
   228  
   229  func GetMetricsWithRelativePath(relCgroupPath string, subsystems map[string]struct{}) (*common.CgroupMetrics, error) {
   230  	return GetManager().GetMetrics(relCgroupPath, subsystems)
   231  }
   232  
   233  func GetPidsWithRelativePath(relCgroupPath string) ([]string, error) {
   234  	absCgroupPath := common.GetAbsCgroupPath(common.DefaultSelectedSubsys, relCgroupPath)
   235  	return GetManager().GetPids(absCgroupPath)
   236  }
   237  
   238  func GetPidsWithAbsolutePath(absCgroupPath string) ([]string, error) {
   239  	return GetManager().GetPids(absCgroupPath)
   240  }
   241  
   242  func GetTasksWithRelativePath(cgroupPath, subsys string) ([]string, error) {
   243  	absCgroupPath := common.GetAbsCgroupPath(subsys, cgroupPath)
   244  	return GetManager().GetTasks(absCgroupPath)
   245  }
   246  
   247  func GetTasksWithAbsolutePath(absCgroupPath string) ([]string, error) {
   248  	return GetManager().GetTasks(absCgroupPath)
   249  }
   250  
   251  func GetCPUSetForContainer(podUID, containerId string) (*common.CPUSetStats, error) {
   252  	cpusetAbsCGPath, err := common.GetContainerAbsCgroupPath(common.CgroupSubsysCPUSet, podUID, containerId)
   253  	if err != nil {
   254  		return nil, fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err)
   255  	}
   256  
   257  	return GetCPUSetWithAbsolutePath(cpusetAbsCGPath)
   258  }
   259  
   260  func DropCacheWithTimeoutForContainer(ctx context.Context, podUID, containerId string, timeoutSecs int, nbytes int64) error {
   261  	memoryAbsCGPath, err := common.GetContainerAbsCgroupPath(common.CgroupSubsysMemory, podUID, containerId)
   262  	if err != nil {
   263  		return fmt.Errorf("GetContainerAbsCgroupPath failed with error: %v", err)
   264  	}
   265  
   266  	err = DropCacheWithTimeoutAndAbsCGPath(timeoutSecs, memoryAbsCGPath, nbytes)
   267  	_ = asyncworker.EmitAsyncedMetrics(ctx, metrics.ConvertMapToTags(map[string]string{
   268  		"podUID":      podUID,
   269  		"containerID": containerId,
   270  		"succeeded":   fmt.Sprintf("%v", err == nil),
   271  	})...)
   272  	return err
   273  }
   274  
   275  func DropCacheWithTimeoutAndAbsCGPath(timeoutSecs int, absCgroupPath string, nbytes int64) error {
   276  	startTime := time.Now()
   277  
   278  	var cmd string
   279  	if common.CheckCgroup2UnifiedMode() {
   280  		if nbytes == 0 {
   281  			general.Infof("[DropCacheWithTimeoutAndAbsCGPath] skip drop cache on %s since nbytes is zero", absCgroupPath)
   282  			return nil
   283  		}
   284  		// cgv2
   285  		cmd = fmt.Sprintf("timeout %d echo %d > %s", timeoutSecs, nbytes, filepath.Join(absCgroupPath, "memory.reclaim"))
   286  	} else {
   287  		// cgv1
   288  		cmd = fmt.Sprintf("timeout %d echo 0 > %s", timeoutSecs, filepath.Join(absCgroupPath, "memory.force_empty"))
   289  	}
   290  
   291  	_, err := exec.Command("bash", "-c", cmd).Output()
   292  
   293  	delta := time.Since(startTime).Seconds()
   294  	general.Infof("[DropCacheWithTimeoutAndAbsCGPath] it takes %v to do \"%s\" on cgroup: %s", delta, cmd, absCgroupPath)
   295  
   296  	// if this command timeout, a none-nil error will be returned,
   297  	// but we should return error iff error returns without timeout
   298  	if err != nil && int(delta) < timeoutSecs {
   299  		return err
   300  	}
   301  
   302  	return nil
   303  }
   304  
   305  func SetExtraCGMemLimitWithTimeoutAndRelCGPath(ctx context.Context, relCgroupPath string, timeoutSecs int, nbytes int64) error {
   306  	memoryAbsCGPath := common.GetAbsCgroupPath(common.CgroupSubsysMemory, relCgroupPath)
   307  
   308  	err := SetExtraCGMemLimitWithTimeoutAndAbsCGPath(timeoutSecs, memoryAbsCGPath, nbytes)
   309  	_ = asyncworker.EmitAsyncedMetrics(ctx, metrics.ConvertMapToTags(map[string]string{
   310  		"relCgroupPath": relCgroupPath,
   311  		"succeeded":     fmt.Sprintf("%v", err == nil),
   312  	})...)
   313  	return err
   314  }
   315  
   316  func SetExtraCGMemLimitWithTimeoutAndAbsCGPath(timeoutSecs int, absCgroupPath string, nbytes int64) error {
   317  	if nbytes == 0 {
   318  		return fmt.Errorf("invalid memory limit nbytes: %d", nbytes)
   319  	}
   320  
   321  	startTime := time.Now()
   322  
   323  	var interfacePath string
   324  	if common.CheckCgroup2UnifiedMode() {
   325  		if nbytes == 0 {
   326  			general.Infof("[SetExtraCGMemLimitWithTimeoutAndAbsCGPath] skip drop cache on %s since nbytes is zero", absCgroupPath)
   327  			return nil
   328  		}
   329  		// cgv2
   330  		interfacePath = filepath.Join(absCgroupPath, "memory.max")
   331  	} else {
   332  		// cgv1
   333  		interfacePath = filepath.Join(absCgroupPath, "memory.limit_in_bytes")
   334  	}
   335  
   336  	cmd := fmt.Sprintf("timeout %d echo %d > %s", timeoutSecs, nbytes, interfacePath)
   337  
   338  	_, err := exec.Command("bash", "-c", cmd).Output()
   339  
   340  	delta := time.Since(startTime).Seconds()
   341  	general.Infof("[SetExtraCGMemLimitWithTimeoutAndAbsCGPath] it takes %v to do \"%s\" on cgroup: %s", delta, cmd, absCgroupPath)
   342  
   343  	// if this command timeout, a none-nil error will be returned,
   344  	// but we should return error iff error returns without timeout
   345  	if err != nil && int(delta) < timeoutSecs {
   346  		return err
   347  	}
   348  
   349  	return nil
   350  }
   351  
   352  func SetSwapMaxWithAbsolutePathToParentCgroupRecursive(absCgroupPath string) error {
   353  	if !common.CheckCgroup2UnifiedMode() {
   354  		general.Infof("[SetSwapMaxWithAbsolutePathToParentCgroupRecursive] is not supported on cgroupv1")
   355  		return nil
   356  	}
   357  	general.Infof("[SetSwapMaxWithAbsolutePathToParentCgroupRecursive] on cgroup: %s", absCgroupPath)
   358  	swapMaxData := &common.MemoryData{SwapMaxInBytes: math.MaxInt64}
   359  	err := GetManager().ApplyMemory(absCgroupPath, swapMaxData)
   360  	if err != nil {
   361  		return err
   362  	}
   363  
   364  	parentDir := filepath.Dir(absCgroupPath)
   365  	if parentDir != absCgroupPath && parentDir != common.GetCgroupRootPath(common.CgroupSubsysMemory) {
   366  		err = SetSwapMaxWithAbsolutePathToParentCgroupRecursive(parentDir)
   367  		if err != nil {
   368  			return err
   369  		}
   370  	}
   371  	return nil
   372  }
   373  
   374  func SetSwapMaxWithAbsolutePathRecursive(absCgroupPath string) error {
   375  	if !common.CheckCgroup2UnifiedMode() {
   376  		general.Infof("[SetSwapMaxWithAbsolutePathRecursive] is not supported on cgroupv1")
   377  		return nil
   378  	}
   379  
   380  	general.Infof("[SetSwapMaxWithAbsolutePathRecursive] on cgroup: %s", absCgroupPath)
   381  
   382  	// set swap max to parent cgroups recursively
   383  	if err := SetSwapMaxWithAbsolutePathToParentCgroupRecursive(filepath.Dir(absCgroupPath)); err != nil {
   384  		return err
   385  	}
   386  
   387  	// set swap max to sub cgroups recursively
   388  	err := filepath.Walk(absCgroupPath, func(path string, info fs.FileInfo, err error) error {
   389  		if err != nil {
   390  			general.Infof("prevent panic by handling failure accessing a path: %s, err: %v", path, err)
   391  			return err
   392  		}
   393  		if info.IsDir() {
   394  			memStats, err := GetMemoryWithAbsolutePath(path)
   395  			if err != nil {
   396  				return filepath.SkipDir
   397  			}
   398  			var diff int64 = math.MaxInt64
   399  			if memStats.Limit-memStats.Usage < uint64(diff) {
   400  				diff = int64(memStats.Limit - memStats.Usage)
   401  			}
   402  			swapMaxData := &common.MemoryData{SwapMaxInBytes: diff}
   403  			err = GetManager().ApplyMemory(path, swapMaxData)
   404  			if err != nil {
   405  				return filepath.SkipDir
   406  			}
   407  		}
   408  		return nil
   409  	})
   410  	if err != nil {
   411  		general.Infof("error walking the path: %s, err: %v", absCgroupPath, err)
   412  		return err
   413  	}
   414  	return nil
   415  }
   416  
   417  func DisableSwapMaxWithAbsolutePathRecursive(absCgroupPath string) error {
   418  	if !common.CheckCgroup2UnifiedMode() {
   419  		general.Infof("[DisableSwapMaxWithAbsolutePathRecursive] is not supported on cgroupv1")
   420  		return nil
   421  	}
   422  	general.Infof("[DisableSwapMaxWithAbsolutePathRecursive] on cgroup: %s", absCgroupPath)
   423  	// disable swap to sub cgroups recursively
   424  	err := filepath.Walk(absCgroupPath, func(path string, info fs.FileInfo, err error) error {
   425  		if err != nil {
   426  			general.Infof("prevent panic by handling failure accessing a path: %s, err: %v", path, err)
   427  			return err
   428  		}
   429  		if info.IsDir() {
   430  			swapMaxData := &common.MemoryData{SwapMaxInBytes: -1}
   431  			err = GetManager().ApplyMemory(path, swapMaxData)
   432  			if err != nil {
   433  				return filepath.SkipDir
   434  			}
   435  		}
   436  		return nil
   437  	})
   438  	if err != nil {
   439  		general.Infof("error walking the path: %s, err: %v ", absCgroupPath, err)
   440  		return err
   441  	}
   442  	return nil
   443  }
   444  
   445  func MemoryOffloadingWithAbsolutePath(ctx context.Context, absCgroupPath string, nbytes int64) error {
   446  	startTime := time.Now()
   447  
   448  	var cmd string
   449  	if common.CheckCgroup2UnifiedMode() {
   450  		if nbytes <= 0 {
   451  			general.Infof("[MemoryOffloadingWithAbsolutePath] skip memory reclaim on %s since nbytes is not valid", absCgroupPath)
   452  			return nil
   453  		}
   454  		// cgv2
   455  		cmd = fmt.Sprintf("echo %d > %s", nbytes, filepath.Join(absCgroupPath, "memory.reclaim"))
   456  	} else {
   457  		// cgv1
   458  		general.Infof("[MemoryOffloadingWithAbsolutePath] is not supported on cgroupv1")
   459  		return nil
   460  	}
   461  
   462  	_, err := exec.Command("bash", "-c", cmd).Output()
   463  
   464  	_ = asyncworker.EmitAsyncedMetrics(ctx, metrics.ConvertMapToTags(map[string]string{
   465  		"absCGPath": absCgroupPath,
   466  		"succeeded": fmt.Sprintf("%v", err == nil),
   467  	})...)
   468  	delta := time.Since(startTime).Seconds()
   469  	general.Infof("[MemoryOffloadingWithAbsolutePath] it takes %v to do \"%s\" on cgroup: %s", delta, cmd, absCgroupPath)
   470  
   471  	return err
   472  }