github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/sysadvisor/metacache/metacache.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metacache
    18  
    19  import (
    20  	"fmt"
    21  	"reflect"
    22  	"sync"
    23  	"time"
    24  
    25  	"k8s.io/apimachinery/pkg/util/sets"
    26  	"k8s.io/klog/v2"
    27  	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
    28  
    29  	"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types"
    30  	"github.com/kubewharf/katalyst-core/pkg/config"
    31  	metrictypes "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/types"
    32  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    33  	metricspool "github.com/kubewharf/katalyst-core/pkg/metrics/metrics-pool"
    34  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    35  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    36  )
    37  
    38  // [notice]
    39  // to compatible with checkpoint checksum calculation,
    40  // we should make guarantees below in checkpoint properties assignment
    41  // 1. resource.Quantity use resource.MustParse("0") to initialize, not to use resource.Quantity{}
    42  // 2. CPUSet use NewCPUSet(...) to initialize, not to use CPUSet{}
    43  // 3. not use omitempty in map property and must make new map to do initialization
    44  
    45  const (
    46  	stateFileName             string = "sys_advisor_state"
    47  	storeStateWarningDuration        = 2 * time.Second
    48  )
    49  
    50  // metric names for metacache
    51  const (
    52  	metricMetaCacheStoreStateDuration = "metacache_store_state_duration"
    53  )
    54  
    55  // MetaReader provides a standard interface to refer to metadata type
    56  type MetaReader interface {
    57  	// GetContainerEntries returns a ContainerEntry copy keyed by pod uid
    58  	GetContainerEntries(podUID string) (types.ContainerEntries, bool)
    59  	// GetContainerInfo returns a ContainerInfo copy keyed by pod uid and container name
    60  	GetContainerInfo(podUID string, containerName string) (*types.ContainerInfo, bool)
    61  	// RangeContainer applies a function to every podUID, containerName, containerInfo set
    62  	RangeContainer(f func(podUID string, containerName string, containerInfo *types.ContainerInfo) bool)
    63  
    64  	// GetPoolInfo returns a PoolInfo copy by pool name
    65  	GetPoolInfo(poolName string) (*types.PoolInfo, bool)
    66  	// GetPoolSize returns the size of pool as integer
    67  	GetPoolSize(poolName string) (int, bool)
    68  
    69  	// GetRegionInfo returns a RegionInfo copy by region name
    70  	GetRegionInfo(regionName string) (*types.RegionInfo, bool)
    71  	// RangeRegionInfo applies a function to every regionName, regionInfo set.
    72  	// If f returns false, range stops the iteration.
    73  	RangeRegionInfo(f func(regionName string, regionInfo *types.RegionInfo) bool)
    74  
    75  	// GetFilteredInferenceResult gets specified model inference result with filter function
    76  	GetFilteredInferenceResult(filterFunc func(result interface{}) (interface{}, error), modelName string) (interface{}, error)
    77  	// GetInferenceResult gets specified model inference result
    78  	GetInferenceResult(modelName string) (interface{}, error)
    79  
    80  	metrictypes.MetricsReader
    81  }
    82  
    83  // MetaWriter provides a standard interface to modify raw metadata (generated by other agents) in local cache
    84  type MetaWriter interface {
    85  	// AddContainer adds a container keyed by pod uid and container name. For repeatedly added
    86  	// container, only mutable metadata will be updated, i.e. request quantity changed by vpa
    87  	AddContainer(podUID string, containerName string, containerInfo *types.ContainerInfo) error
    88  	// SetContainerInfo updates ContainerInfo keyed by pod uid and container name
    89  	SetContainerInfo(podUID string, containerName string, containerInfo *types.ContainerInfo) error
    90  	// RangeAndUpdateContainer applies a function to every podUID, containerName, containerInfo set.
    91  	// Not recommended using if RangeContainer satisfies the requirement.
    92  	// If f returns false, range stops the iteration.
    93  	RangeAndUpdateContainer(f func(podUID string, containerName string, containerInfo *types.ContainerInfo) bool) error
    94  
    95  	// DeleteContainer deletes a ContainerInfo keyed by pod uid and container name
    96  	DeleteContainer(podUID string, containerName string) error
    97  	// RangeAndDeleteContainer applies a function to every podUID, containerName, containerInfo set.
    98  	// If f returns true, the containerInfo will be deleted.
    99  	RangeAndDeleteContainer(f func(containerInfo *types.ContainerInfo) bool, safeTime int64) error
   100  	// RemovePod deletes a PodInfo keyed by pod uid. Repeatedly remove will be ignored.
   101  	RemovePod(podUID string) error
   102  	// ClearContainers remove all containers
   103  	ClearContainers() error
   104  
   105  	// SetPoolInfo stores a PoolInfo by pool name
   106  	SetPoolInfo(poolName string, poolInfo *types.PoolInfo) error
   107  	// DeletePool deletes a PoolInfo keyed by pool name
   108  	DeletePool(poolName string) error
   109  	// GCPoolEntries deletes GCPoolEntries not existing on node
   110  	GCPoolEntries(livingPoolNameSet sets.String) error
   111  
   112  	// SetRegionEntries overwrites the whole region entries
   113  	SetRegionEntries(entries types.RegionEntries) error
   114  	// SetRegionInfo stores a RegionInfo by region name
   115  	SetRegionInfo(regionName string, regionInfo *types.RegionInfo) error
   116  
   117  	// SetInferenceResult sets specified model inference result
   118  	SetInferenceResult(modelName string, result interface{}) error
   119  }
   120  
   121  type AdvisorNotifier struct{}
   122  
   123  type MetaCache interface {
   124  	MetaReader
   125  	MetaWriter
   126  }
   127  
   128  // MetaCacheImp stores metadata and info of pod, node, pool, subnuma etc. as a cache,
   129  // and synchronizes data to sysadvisor state file. It is thread-safe to read and write.
   130  // Deep copy logic is performed during accessing metacache entries instead of directly
   131  // return pointer of each struct to avoid mis-overwrite.
   132  type MetaCacheImp struct {
   133  	metrictypes.MetricsReader
   134  
   135  	podEntries types.PodEntries
   136  	podMutex   sync.RWMutex
   137  
   138  	poolEntries types.PoolEntries
   139  	poolMutex   sync.RWMutex
   140  
   141  	regionEntries types.RegionEntries
   142  	regionMutex   sync.RWMutex
   143  
   144  	checkpointManager checkpointmanager.CheckpointManager
   145  	checkpointName    string
   146  
   147  	emitter metrics.MetricEmitter
   148  
   149  	modelToResult map[string]interface{}
   150  	modelMutex    sync.RWMutex
   151  
   152  	containerCreateTimestamp map[string]int64
   153  }
   154  
   155  var _ MetaCache = &MetaCacheImp{}
   156  
   157  // NewMetaCacheImp returns the single instance of MetaCacheImp
   158  func NewMetaCacheImp(conf *config.Configuration, emitterPool metricspool.MetricsEmitterPool, metricsReader metrictypes.MetricsReader) (*MetaCacheImp, error) {
   159  	stateFileDir := conf.GenericSysAdvisorConfiguration.StateFileDirectory
   160  	checkpointManager, err := checkpointmanager.NewCheckpointManager(stateFileDir)
   161  	if err != nil {
   162  		return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
   163  	}
   164  	emitter := emitterPool.GetDefaultMetricsEmitter().WithTags("advisor-metacache")
   165  
   166  	mc := &MetaCacheImp{
   167  		MetricsReader:            metricsReader,
   168  		podEntries:               make(types.PodEntries),
   169  		poolEntries:              make(types.PoolEntries),
   170  		regionEntries:            make(types.RegionEntries),
   171  		checkpointManager:        checkpointManager,
   172  		checkpointName:           stateFileName,
   173  		emitter:                  emitter,
   174  		modelToResult:            make(map[string]interface{}),
   175  		containerCreateTimestamp: make(map[string]int64),
   176  	}
   177  
   178  	// Restore from checkpoint before any function call to metacache api
   179  	if err := mc.restoreState(); err != nil {
   180  		return mc, err
   181  	}
   182  
   183  	return mc, nil
   184  }
   185  
   186  /*
   187  	standard implementation for metaReader
   188  */
   189  
   190  func (mc *MetaCacheImp) GetContainerEntries(podUID string) (types.ContainerEntries, bool) {
   191  	mc.podMutex.RLock()
   192  	defer mc.podMutex.RUnlock()
   193  
   194  	v, ok := mc.podEntries[podUID]
   195  	return v.Clone(), ok
   196  }
   197  
   198  func (mc *MetaCacheImp) GetContainerInfo(podUID string, containerName string) (*types.ContainerInfo, bool) {
   199  	mc.podMutex.RLock()
   200  	defer mc.podMutex.RUnlock()
   201  
   202  	podInfo, ok := mc.podEntries[podUID]
   203  	if !ok {
   204  		return nil, false
   205  	}
   206  	containerInfo, ok := podInfo[containerName]
   207  
   208  	return containerInfo.Clone(), ok
   209  }
   210  
   211  // RangeContainer should deepcopy so that pod and container entries will not be overwritten.
   212  func (mc *MetaCacheImp) RangeContainer(f func(podUID string, containerName string, containerInfo *types.ContainerInfo) bool) {
   213  	mc.podMutex.RLock()
   214  	defer mc.podMutex.RUnlock()
   215  
   216  	for podUID, podInfo := range mc.podEntries.Clone() {
   217  		for containerName, containerInfo := range podInfo {
   218  			if !f(podUID, containerName, containerInfo) {
   219  				break
   220  			}
   221  		}
   222  	}
   223  }
   224  
   225  func (mc *MetaCacheImp) GetPoolInfo(poolName string) (*types.PoolInfo, bool) {
   226  	mc.poolMutex.RLock()
   227  	defer mc.poolMutex.RUnlock()
   228  
   229  	poolInfo, ok := mc.poolEntries[poolName]
   230  	return poolInfo.Clone(), ok
   231  }
   232  
   233  func (mc *MetaCacheImp) GetPoolSize(poolName string) (int, bool) {
   234  	mc.poolMutex.RLock()
   235  	defer mc.poolMutex.RUnlock()
   236  
   237  	pi, ok := mc.poolEntries[poolName]
   238  	if !ok {
   239  		return 0, false
   240  	}
   241  	return machine.CountCPUAssignmentCPUs(pi.TopologyAwareAssignments), true
   242  }
   243  
   244  func (mc *MetaCacheImp) GetRegionInfo(regionName string) (*types.RegionInfo, bool) {
   245  	mc.regionMutex.RLock()
   246  	defer mc.regionMutex.RUnlock()
   247  
   248  	regionInfo, ok := mc.regionEntries[regionName]
   249  	return regionInfo.Clone(), ok
   250  }
   251  
   252  // GetFilteredInferenceResult gets specified model inference result with filter function
   253  // whether it returns a deep copied result depends on the implementation of filterFunc
   254  func (mc *MetaCacheImp) GetFilteredInferenceResult(filterFunc func(result interface{}) (interface{}, error),
   255  	modelName string,
   256  ) (interface{}, error) {
   257  	mc.modelMutex.RLock()
   258  	defer mc.modelMutex.RUnlock()
   259  
   260  	if mc.modelToResult[modelName] == nil {
   261  		return nil, fmt.Errorf("result for model: %s doesn't exist", modelName)
   262  	}
   263  
   264  	if filterFunc == nil {
   265  		return mc.modelToResult[modelName], nil
   266  	} else {
   267  		return filterFunc(mc.modelToResult[modelName])
   268  	}
   269  }
   270  
   271  // GetInferenceResult gets specified model inference result
   272  // notice it doesn't return a deep copied result
   273  func (mc *MetaCacheImp) GetInferenceResult(modelName string) (interface{}, error) {
   274  	return mc.GetFilteredInferenceResult(nil, modelName)
   275  }
   276  
   277  func (mc *MetaCacheImp) RangeRegionInfo(f func(regionName string, regionInfo *types.RegionInfo) bool) {
   278  	mc.regionMutex.RLock()
   279  	defer mc.regionMutex.RUnlock()
   280  
   281  	for regionName, regionInfo := range mc.regionEntries.Clone() {
   282  		if !f(regionName, regionInfo) {
   283  			break
   284  		}
   285  	}
   286  }
   287  
   288  /*
   289  	standard implementation for MetaWriter
   290  */
   291  
   292  func (mc *MetaCacheImp) AddContainer(podUID string, containerName string, containerInfo *types.ContainerInfo) error {
   293  	mc.podMutex.Lock()
   294  	defer mc.podMutex.Unlock()
   295  
   296  	if podInfo, ok := mc.podEntries[podUID]; ok {
   297  		if ci, ok := podInfo[containerName]; ok {
   298  			ci.UpdateMeta(containerInfo)
   299  			return nil
   300  		}
   301  	}
   302  
   303  	mc.setContainerCreateTimestamp(podUID, containerName, time.Now().UnixNano())
   304  	if mc.setContainerInfo(podUID, containerName, containerInfo) {
   305  		return mc.storeState()
   306  	}
   307  	return nil
   308  }
   309  
   310  func (mc *MetaCacheImp) SetContainerInfo(podUID string, containerName string, containerInfo *types.ContainerInfo) error {
   311  	mc.podMutex.Lock()
   312  	defer mc.podMutex.Unlock()
   313  
   314  	if mc.setContainerInfo(podUID, containerName, containerInfo) {
   315  		return mc.storeState()
   316  	}
   317  	return nil
   318  }
   319  
   320  func (mc *MetaCacheImp) setContainerInfo(podUID string, containerName string, containerInfo *types.ContainerInfo) bool {
   321  	podInfo, ok := mc.podEntries[podUID]
   322  	if !ok {
   323  		mc.podEntries[podUID] = make(types.ContainerEntries)
   324  		podInfo = mc.podEntries[podUID]
   325  	}
   326  
   327  	if reflect.DeepEqual(podInfo[containerName], containerInfo) {
   328  		return false
   329  	} else {
   330  		podInfo[containerName] = containerInfo
   331  		return true
   332  	}
   333  }
   334  
   335  func (mc *MetaCacheImp) RangeAndUpdateContainer(f func(podUID string, containerName string, containerInfo *types.ContainerInfo) bool) error {
   336  	mc.podMutex.Lock()
   337  	defer mc.podMutex.Unlock()
   338  
   339  	oldPodEntries := mc.podEntries.Clone()
   340  
   341  	for podUID, podInfo := range mc.podEntries {
   342  		for containerName, containerInfo := range podInfo {
   343  			if !f(podUID, containerName, containerInfo) {
   344  				break
   345  			}
   346  		}
   347  	}
   348  
   349  	if !reflect.DeepEqual(oldPodEntries, mc.podEntries) {
   350  		return mc.storeState()
   351  	}
   352  	return nil
   353  }
   354  
   355  func (mc *MetaCacheImp) DeleteContainer(podUID string, containerName string) error {
   356  	mc.podMutex.Lock()
   357  	defer mc.podMutex.Unlock()
   358  
   359  	if mc.deleteContainer(podUID, containerName) {
   360  		return mc.storeState()
   361  	}
   362  	return nil
   363  }
   364  
   365  func (mc *MetaCacheImp) ClearContainers() error {
   366  	mc.podMutex.Lock()
   367  	defer mc.podMutex.Unlock()
   368  
   369  	if len(mc.containerCreateTimestamp) != 0 {
   370  		mc.containerCreateTimestamp = map[string]int64{}
   371  	}
   372  	if len(mc.podEntries) != 0 {
   373  		mc.podEntries = map[string]types.ContainerEntries{}
   374  		return mc.storeState()
   375  	}
   376  
   377  	return nil
   378  }
   379  
   380  func (mc *MetaCacheImp) RangeAndDeleteContainer(f func(containerInfo *types.ContainerInfo) bool, safeTime int64) error {
   381  	mc.podMutex.Lock()
   382  	defer mc.podMutex.Unlock()
   383  
   384  	needStoreState := false
   385  	for _, podInfo := range mc.podEntries {
   386  		for _, containerInfo := range podInfo {
   387  			if safeTime > 0 {
   388  				createAt := mc.getContainerCreateTimestamp(containerInfo.PodUID, containerInfo.ContainerName)
   389  				if createAt > safeTime {
   390  					continue
   391  				}
   392  			}
   393  			if f(containerInfo) {
   394  				klog.Warningf("RangeAndDeleteContainer delete container %s/%s with safe time (%d) and create time (%d)",
   395  					containerInfo.PodUID, containerInfo.ContainerName, safeTime, mc.getContainerCreateTimestamp(containerInfo.PodUID, containerInfo.ContainerName))
   396  				if mc.deleteContainer(containerInfo.PodUID, containerInfo.ContainerName) {
   397  					needStoreState = true
   398  				}
   399  			}
   400  		}
   401  	}
   402  
   403  	if needStoreState {
   404  		return mc.storeState()
   405  	}
   406  	return nil
   407  }
   408  
   409  func (mc *MetaCacheImp) deleteContainer(podUID string, containerName string) bool {
   410  	mc.deleteContainerCreateTimestamp(podUID, containerName)
   411  
   412  	podInfo, ok := mc.podEntries[podUID]
   413  	if !ok {
   414  		return false
   415  	}
   416  	_, ok = podInfo[containerName]
   417  	if !ok {
   418  		return false
   419  	}
   420  
   421  	delete(podInfo, containerName)
   422  	if len(podInfo) <= 0 {
   423  		delete(mc.podEntries, podUID)
   424  	}
   425  	return true
   426  }
   427  
   428  func (mc *MetaCacheImp) RemovePod(podUID string) error {
   429  	mc.podMutex.Lock()
   430  	defer mc.podMutex.Unlock()
   431  
   432  	containerEntries, ok := mc.podEntries[podUID]
   433  	if !ok {
   434  		return nil
   435  	}
   436  	for _, container := range containerEntries {
   437  		mc.deleteContainerCreateTimestamp(podUID, container.ContainerName)
   438  	}
   439  	delete(mc.podEntries, podUID)
   440  
   441  	return mc.storeState()
   442  }
   443  
   444  func (mc *MetaCacheImp) SetPoolInfo(poolName string, poolInfo *types.PoolInfo) error {
   445  	mc.poolMutex.Lock()
   446  	defer mc.poolMutex.Unlock()
   447  
   448  	if reflect.DeepEqual(mc.poolEntries[poolName], poolInfo) {
   449  		return nil
   450  	}
   451  
   452  	mc.poolEntries[poolName] = poolInfo
   453  
   454  	return mc.storeState()
   455  }
   456  
   457  func (mc *MetaCacheImp) DeletePool(poolName string) error {
   458  	mc.poolMutex.Lock()
   459  	defer mc.poolMutex.Unlock()
   460  
   461  	if _, ok := mc.poolEntries[poolName]; !ok {
   462  		return nil
   463  	}
   464  
   465  	delete(mc.poolEntries, poolName)
   466  
   467  	return mc.storeState()
   468  }
   469  
   470  func (mc *MetaCacheImp) GCPoolEntries(livingPoolNameSet sets.String) error {
   471  	mc.poolMutex.Lock()
   472  	defer mc.poolMutex.Unlock()
   473  
   474  	needStoreState := false
   475  	for poolName := range mc.poolEntries {
   476  		if _, ok := livingPoolNameSet[poolName]; !ok {
   477  			delete(mc.poolEntries, poolName)
   478  			needStoreState = true
   479  		}
   480  	}
   481  
   482  	if needStoreState {
   483  		return mc.storeState()
   484  	}
   485  	return nil
   486  }
   487  
   488  func (mc *MetaCacheImp) SetRegionEntries(entries types.RegionEntries) error {
   489  	mc.regionMutex.Lock()
   490  	defer mc.regionMutex.Unlock()
   491  
   492  	oldRegionEntries := mc.regionEntries.Clone()
   493  	mc.regionEntries = entries.Clone()
   494  
   495  	if !reflect.DeepEqual(oldRegionEntries, mc.regionEntries) {
   496  		return mc.storeState()
   497  	}
   498  	return nil
   499  }
   500  
   501  func (mc *MetaCacheImp) SetRegionInfo(regionName string, regionInfo *types.RegionInfo) error {
   502  	mc.regionMutex.Lock()
   503  	defer mc.regionMutex.Unlock()
   504  
   505  	if reflect.DeepEqual(mc.regionEntries[regionName], regionInfo) {
   506  		return nil
   507  	} else {
   508  		mc.regionEntries[regionName] = regionInfo
   509  		return mc.storeState()
   510  	}
   511  }
   512  
   513  // SetInferenceResult sets specified model inference result
   514  func (mc *MetaCacheImp) SetInferenceResult(modelName string, result interface{}) error {
   515  	general.InfoS("called", "modelName", modelName)
   516  
   517  	if result == nil {
   518  		return fmt.Errorf("nil result")
   519  	}
   520  
   521  	mc.modelMutex.Lock()
   522  	defer mc.modelMutex.Unlock()
   523  
   524  	mc.modelToResult[modelName] = result
   525  	return nil
   526  }
   527  
   528  /*
   529  	other helper functions
   530  */
   531  
   532  func (mc *MetaCacheImp) storeState() error {
   533  	checkpoint := NewMetaCacheCheckpoint()
   534  	checkpoint.PodEntries = mc.podEntries
   535  	checkpoint.PoolEntries = mc.poolEntries
   536  	checkpoint.RegionEntries = mc.regionEntries
   537  
   538  	startTime := time.Now()
   539  	defer func(t time.Time) {
   540  		elapsed := time.Since(t)
   541  		if elapsed > storeStateWarningDuration {
   542  			klog.Errorf("[metacache] store state took too long time, duration %v", elapsed)
   543  		}
   544  		_ = mc.emitter.StoreFloat64(metricMetaCacheStoreStateDuration, float64(elapsed/time.Millisecond), metrics.MetricTypeNameRaw)
   545  	}(startTime)
   546  
   547  	if err := mc.checkpointManager.CreateCheckpoint(mc.checkpointName, checkpoint); err != nil {
   548  		klog.Errorf("[metacache] store state failed: %v", err)
   549  		return err
   550  	}
   551  	klog.Infof("[metacache] store state succeeded")
   552  
   553  	return nil
   554  }
   555  
   556  func (mc *MetaCacheImp) restoreState() error {
   557  	checkpoint := NewMetaCacheCheckpoint()
   558  
   559  	if err := mc.checkpointManager.GetCheckpoint(mc.checkpointName, checkpoint); err != nil {
   560  		klog.Infof("[metacache] checkpoint %v err %v, create it", mc.checkpointName, err)
   561  		return mc.storeState()
   562  	}
   563  
   564  	mc.podEntries = checkpoint.PodEntries
   565  	mc.poolEntries = checkpoint.PoolEntries
   566  	mc.regionEntries = checkpoint.RegionEntries
   567  
   568  	klog.Infof("[metacache] restore state succeeded")
   569  
   570  	return nil
   571  }
   572  
   573  func (mc *MetaCacheImp) setContainerCreateTimestamp(podUID, containerName string, timestamp int64) {
   574  	mc.containerCreateTimestamp[fmt.Sprintf("%s/%s", podUID, containerName)] = timestamp
   575  }
   576  
   577  func (mc *MetaCacheImp) getContainerCreateTimestamp(podUID, containerName string) int64 {
   578  	return mc.containerCreateTimestamp[fmt.Sprintf("%s/%s", podUID, containerName)]
   579  }
   580  
   581  func (mc *MetaCacheImp) deleteContainerCreateTimestamp(podUID, containerName string) {
   582  	delete(mc.containerCreateTimestamp, fmt.Sprintf("%s/%s", podUID, containerName))
   583  }