github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package dynamicpolicy
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	v1 "k8s.io/api/core/v1"
    25  	"k8s.io/apimachinery/pkg/util/sets"
    26  
    27  	"github.com/kubewharf/katalyst-api/pkg/consts"
    28  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/advisorsvc"
    29  	cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts"
    30  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state"
    31  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
    32  	coreconfig "github.com/kubewharf/katalyst-core/pkg/config"
    33  	dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
    34  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    35  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    36  	cgroupcm "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common"
    37  	cgroupcmutils "github.com/kubewharf/katalyst-core/pkg/util/cgroup/manager"
    38  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    39  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    40  )
    41  
    42  // checkCPUSet emit errors if the memory allocation falls into unexpected results
    43  func (p *DynamicPolicy) checkCPUSet(_ *coreconfig.Configuration,
    44  	_ interface{},
    45  	_ *dynamicconfig.DynamicAgentConfiguration,
    46  	_ metrics.MetricEmitter,
    47  	_ *metaserver.MetaServer,
    48  ) {
    49  	general.Infof("exec checkCPUSet")
    50  	var (
    51  		err           error
    52  		invalidCPUSet = false
    53  		cpuSetOverlap = false
    54  	)
    55  
    56  	defer func() {
    57  		if err != nil {
    58  			_ = general.UpdateHealthzStateByError(cpuconsts.CheckCPUSet, err)
    59  		} else if invalidCPUSet {
    60  			_ = general.UpdateHealthzState(cpuconsts.CheckCPUSet, general.HealthzCheckStateNotReady, "invalid cpuset exists")
    61  		} else if cpuSetOverlap {
    62  			_ = general.UpdateHealthzState(cpuconsts.CheckCPUSet, general.HealthzCheckStateNotReady, "cpuset overlap")
    63  		} else {
    64  			_ = general.UpdateHealthzState(cpuconsts.CheckCPUSet, general.HealthzCheckStateReady, "")
    65  		}
    66  	}()
    67  
    68  	podEntries := p.state.GetPodEntries()
    69  	actualCPUSets := make(map[string]map[string]machine.CPUSet)
    70  	for podUID, containerEntries := range podEntries {
    71  		if containerEntries.IsPoolEntry() {
    72  			continue
    73  		}
    74  
    75  		for containerName, allocationInfo := range containerEntries {
    76  			if allocationInfo == nil || !allocationInfo.CheckMainContainer() {
    77  				continue
    78  			} else if state.CheckShared(allocationInfo) && p.getContainerRequestedCores(allocationInfo) == 0 {
    79  				general.Warningf("skip cpuset checking for pod: %s/%s container: %s with zero cpu request",
    80  					allocationInfo.PodNamespace, allocationInfo.PodName, containerName)
    81  				continue
    82  			}
    83  
    84  			tags := metrics.ConvertMapToTags(map[string]string{
    85  				"podNamespace":  allocationInfo.PodNamespace,
    86  				"podName":       allocationInfo.PodName,
    87  				"containerName": allocationInfo.ContainerName,
    88  			})
    89  			var (
    90  				containerId string
    91  				cpuSetStats *cgroupcm.CPUSetStats
    92  			)
    93  
    94  			containerId, err = p.metaServer.GetContainerID(podUID, containerName)
    95  			if err != nil {
    96  				general.Errorf("get container id of pod: %s container: %s failed with error: %v", podUID, containerName, err)
    97  				continue
    98  			}
    99  
   100  			cpuSetStats, err = cgroupcmutils.GetCPUSetForContainer(podUID, containerId)
   101  			if err != nil {
   102  				general.Errorf("GetCPUSet of pod: %s container: name(%s), id(%s) failed with error: %v",
   103  					podUID, containerName, containerId, err)
   104  				_ = p.emitter.StoreInt64(util.MetricNameRealStateInvalid, 1, metrics.MetricTypeNameRaw, tags...)
   105  				continue
   106  			}
   107  
   108  			if actualCPUSets[podUID] == nil {
   109  				actualCPUSets[podUID] = make(map[string]machine.CPUSet)
   110  			}
   111  			actualCPUSets[podUID][containerName] = machine.MustParse(cpuSetStats.CPUs)
   112  
   113  			general.Infof("pod: %s/%s, container: %s, state CPUSet: %s, actual CPUSet: %s",
   114  				allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName,
   115  				allocationInfo.AllocationResult.String(), actualCPUSets[podUID][containerName].String())
   116  
   117  			// only do comparison for dedicated_cores with numa_biding to avoid effect of adjustment for shared_cores
   118  			if !state.CheckDedicated(allocationInfo) {
   119  				continue
   120  			}
   121  
   122  			if !actualCPUSets[podUID][containerName].Equals(allocationInfo.OriginalAllocationResult) {
   123  				invalidCPUSet = true
   124  				general.Errorf("pod: %s/%s, container: %s, cpuset invalid",
   125  					allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName)
   126  				_ = p.emitter.StoreInt64(util.MetricNameCPUSetInvalid, 1, metrics.MetricTypeNameRaw, tags...)
   127  			}
   128  		}
   129  	}
   130  
   131  	unionDedicatedCPUSet := machine.NewCPUSet()
   132  	unionSharedCPUSet := machine.NewCPUSet()
   133  
   134  	for podUID, containerEntries := range actualCPUSets {
   135  		for containerName, cset := range containerEntries {
   136  			allocationInfo := podEntries[podUID][containerName]
   137  			if allocationInfo == nil {
   138  				continue
   139  			}
   140  
   141  			switch allocationInfo.QoSLevel {
   142  			case consts.PodAnnotationQoSLevelDedicatedCores:
   143  				if !cpuSetOverlap && cset.Intersection(unionDedicatedCPUSet).Size() != 0 {
   144  					cpuSetOverlap = true
   145  					general.Errorf("pod: %s/%s, container: %s cpuset: %s overlaps with others",
   146  						allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, cset.String())
   147  				}
   148  				unionDedicatedCPUSet = unionDedicatedCPUSet.Union(cset)
   149  			case consts.PodAnnotationQoSLevelSharedCores:
   150  				unionSharedCPUSet = unionSharedCPUSet.Union(cset)
   151  			}
   152  		}
   153  	}
   154  
   155  	regionOverlap := unionSharedCPUSet.Intersection(unionDedicatedCPUSet).Size() != 0
   156  	if regionOverlap {
   157  		general.Errorf("shared_cores union cpuset: %s overlaps with dedicated_cores union cpuset: %s",
   158  			unionSharedCPUSet.String(), unionDedicatedCPUSet.String())
   159  	}
   160  
   161  	if !cpuSetOverlap {
   162  		cpuSetOverlap = regionOverlap
   163  	}
   164  	if cpuSetOverlap {
   165  		general.Errorf("found cpuset overlap. actualCPUSets: %+v", actualCPUSets)
   166  		_ = p.emitter.StoreInt64(util.MetricNameCPUSetOverlap, 1, metrics.MetricTypeNameRaw)
   167  	}
   168  
   169  	general.Infof("finish checkCPUSet")
   170  }
   171  
   172  // clearResidualState is used to clean residual pods in local state
   173  func (p *DynamicPolicy) clearResidualState(_ *coreconfig.Configuration,
   174  	_ interface{},
   175  	_ *dynamicconfig.DynamicAgentConfiguration,
   176  	_ metrics.MetricEmitter,
   177  	_ *metaserver.MetaServer,
   178  ) {
   179  	general.Infof("exec clearResidualState")
   180  	var (
   181  		err     error
   182  		podList []*v1.Pod
   183  	)
   184  	residualSet := make(map[string]bool)
   185  
   186  	defer func() {
   187  		_ = general.UpdateHealthzStateByError(cpuconsts.ClearResidualState, err)
   188  	}()
   189  
   190  	if p.metaServer == nil {
   191  		general.Errorf("nil metaServer")
   192  		return
   193  	}
   194  
   195  	ctx := context.Background()
   196  	podList, err = p.metaServer.GetPodList(ctx, nil)
   197  	if err != nil {
   198  		general.Errorf("get pod list failed: %v", err)
   199  		return
   200  	}
   201  
   202  	podSet := sets.NewString()
   203  	for _, pod := range podList {
   204  		podSet.Insert(fmt.Sprintf("%v", pod.UID))
   205  	}
   206  
   207  	p.Lock()
   208  	defer p.Unlock()
   209  
   210  	podEntries := p.state.GetPodEntries()
   211  	for podUID, containerEntries := range podEntries {
   212  		if containerEntries.IsPoolEntry() {
   213  			continue
   214  		}
   215  
   216  		if !podSet.Has(podUID) {
   217  			residualSet[podUID] = true
   218  			p.residualHitMap[podUID] += 1
   219  			general.Infof("found pod: %s with state but doesn't show up in pod watcher, hit count: %d", podUID, p.residualHitMap[podUID])
   220  		}
   221  	}
   222  
   223  	podsToDelete := sets.NewString()
   224  	for podUID, hitCount := range p.residualHitMap {
   225  		if !residualSet[podUID] {
   226  			general.Infof("already found pod: %s in pod watcher or its state is cleared, delete it from residualHitMap", podUID)
   227  			delete(p.residualHitMap, podUID)
   228  			continue
   229  		}
   230  
   231  		if time.Duration(hitCount)*stateCheckPeriod >= maxResidualTime {
   232  			podsToDelete.Insert(podUID)
   233  		}
   234  	}
   235  
   236  	if podsToDelete.Len() > 0 {
   237  		for {
   238  			podUID, found := podsToDelete.PopAny()
   239  			if !found {
   240  				break
   241  			}
   242  
   243  			var rErr error
   244  			if p.enableCPUAdvisor {
   245  				_, rErr = p.advisorClient.RemovePod(ctx, &advisorsvc.RemovePodRequest{
   246  					PodUid: podUID,
   247  				})
   248  			}
   249  			if rErr != nil {
   250  				general.Errorf("remove residual pod: %s in sys advisor failed with error: %v, remain it in state", podUID, rErr)
   251  				continue
   252  			}
   253  
   254  			general.Infof("clear residual pod: %s in state", podUID)
   255  			delete(podEntries, podUID)
   256  		}
   257  
   258  		var updatedMachineState state.NUMANodeMap
   259  		updatedMachineState, err = generateMachineStateFromPodEntries(p.machineInfo.CPUTopology, podEntries)
   260  		if err != nil {
   261  			general.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err)
   262  			return
   263  		}
   264  
   265  		p.state.SetPodEntries(podEntries)
   266  		p.state.SetMachineState(updatedMachineState)
   267  
   268  		err = p.adjustAllocationEntries()
   269  		if err != nil {
   270  			general.ErrorS(err, "adjustAllocationEntries failed")
   271  		}
   272  	}
   273  }
   274  
   275  // syncCPUIdle is used to set cpu idle for reclaimed cores
   276  func (p *DynamicPolicy) syncCPUIdle(_ *coreconfig.Configuration,
   277  	_ interface{},
   278  	_ *dynamicconfig.DynamicAgentConfiguration,
   279  	_ metrics.MetricEmitter,
   280  	_ *metaserver.MetaServer,
   281  ) {
   282  	general.Infof("exec syncCPUIdle")
   283  	var err error
   284  	defer func() {
   285  		_ = general.UpdateHealthzStateByError(cpuconsts.SyncCPUIdle, err)
   286  	}()
   287  
   288  	if !cgroupcm.IsCPUIdleSupported() {
   289  		general.Warningf("cpu idle isn't unsupported, skip syncing")
   290  		return
   291  	}
   292  
   293  	err = cgroupcmutils.ApplyCPUWithRelativePath(p.reclaimRelativeRootCgroupPath, &cgroupcm.CPUData{CpuIdlePtr: &p.enableCPUIdle})
   294  	if err != nil {
   295  		general.Errorf("ApplyCPUWithRelativePath in %s with enableCPUIdle: %v in failed with error: %v",
   296  			p.reclaimRelativeRootCgroupPath, p.enableCPUIdle, err)
   297  	}
   298  }