github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/sysadvisor/metacache/metacache.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metacache 18 19 import ( 20 "fmt" 21 "reflect" 22 "sync" 23 "time" 24 25 "k8s.io/apimachinery/pkg/util/sets" 26 "k8s.io/klog/v2" 27 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" 28 29 "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types" 30 "github.com/kubewharf/katalyst-core/pkg/config" 31 metrictypes "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/types" 32 "github.com/kubewharf/katalyst-core/pkg/metrics" 33 metricspool "github.com/kubewharf/katalyst-core/pkg/metrics/metrics-pool" 34 "github.com/kubewharf/katalyst-core/pkg/util/general" 35 "github.com/kubewharf/katalyst-core/pkg/util/machine" 36 ) 37 38 // [notice] 39 // to compatible with checkpoint checksum calculation, 40 // we should make guarantees below in checkpoint properties assignment 41 // 1. resource.Quantity use resource.MustParse("0") to initialize, not to use resource.Quantity{} 42 // 2. CPUSet use NewCPUSet(...) to initialize, not to use CPUSet{} 43 // 3. not use omitempty in map property and must make new map to do initialization 44 45 const ( 46 stateFileName string = "sys_advisor_state" 47 storeStateWarningDuration = 2 * time.Second 48 ) 49 50 // metric names for metacache 51 const ( 52 metricMetaCacheStoreStateDuration = "metacache_store_state_duration" 53 ) 54 55 // MetaReader provides a standard interface to refer to metadata type 56 type MetaReader interface { 57 // GetContainerEntries returns a ContainerEntry copy keyed by pod uid 58 GetContainerEntries(podUID string) (types.ContainerEntries, bool) 59 // GetContainerInfo returns a ContainerInfo copy keyed by pod uid and container name 60 GetContainerInfo(podUID string, containerName string) (*types.ContainerInfo, bool) 61 // RangeContainer applies a function to every podUID, containerName, containerInfo set 62 RangeContainer(f func(podUID string, containerName string, containerInfo *types.ContainerInfo) bool) 63 64 // GetPoolInfo returns a PoolInfo copy by pool name 65 GetPoolInfo(poolName string) (*types.PoolInfo, bool) 66 // GetPoolSize returns the size of pool as integer 67 GetPoolSize(poolName string) (int, bool) 68 69 // GetRegionInfo returns a RegionInfo copy by region name 70 GetRegionInfo(regionName string) (*types.RegionInfo, bool) 71 // RangeRegionInfo applies a function to every regionName, regionInfo set. 72 // If f returns false, range stops the iteration. 73 RangeRegionInfo(f func(regionName string, regionInfo *types.RegionInfo) bool) 74 75 // GetFilteredInferenceResult gets specified model inference result with filter function 76 GetFilteredInferenceResult(filterFunc func(result interface{}) (interface{}, error), modelName string) (interface{}, error) 77 // GetInferenceResult gets specified model inference result 78 GetInferenceResult(modelName string) (interface{}, error) 79 80 metrictypes.MetricsReader 81 } 82 83 // MetaWriter provides a standard interface to modify raw metadata (generated by other agents) in local cache 84 type MetaWriter interface { 85 // AddContainer adds a container keyed by pod uid and container name. For repeatedly added 86 // container, only mutable metadata will be updated, i.e. request quantity changed by vpa 87 AddContainer(podUID string, containerName string, containerInfo *types.ContainerInfo) error 88 // SetContainerInfo updates ContainerInfo keyed by pod uid and container name 89 SetContainerInfo(podUID string, containerName string, containerInfo *types.ContainerInfo) error 90 // RangeAndUpdateContainer applies a function to every podUID, containerName, containerInfo set. 91 // Not recommended using if RangeContainer satisfies the requirement. 92 // If f returns false, range stops the iteration. 93 RangeAndUpdateContainer(f func(podUID string, containerName string, containerInfo *types.ContainerInfo) bool) error 94 95 // DeleteContainer deletes a ContainerInfo keyed by pod uid and container name 96 DeleteContainer(podUID string, containerName string) error 97 // RangeAndDeleteContainer applies a function to every podUID, containerName, containerInfo set. 98 // If f returns true, the containerInfo will be deleted. 99 RangeAndDeleteContainer(f func(containerInfo *types.ContainerInfo) bool, safeTime int64) error 100 // RemovePod deletes a PodInfo keyed by pod uid. Repeatedly remove will be ignored. 101 RemovePod(podUID string) error 102 // ClearContainers remove all containers 103 ClearContainers() error 104 105 // SetPoolInfo stores a PoolInfo by pool name 106 SetPoolInfo(poolName string, poolInfo *types.PoolInfo) error 107 // DeletePool deletes a PoolInfo keyed by pool name 108 DeletePool(poolName string) error 109 // GCPoolEntries deletes GCPoolEntries not existing on node 110 GCPoolEntries(livingPoolNameSet sets.String) error 111 112 // SetRegionEntries overwrites the whole region entries 113 SetRegionEntries(entries types.RegionEntries) error 114 // SetRegionInfo stores a RegionInfo by region name 115 SetRegionInfo(regionName string, regionInfo *types.RegionInfo) error 116 117 // SetInferenceResult sets specified model inference result 118 SetInferenceResult(modelName string, result interface{}) error 119 } 120 121 type AdvisorNotifier struct{} 122 123 type MetaCache interface { 124 MetaReader 125 MetaWriter 126 } 127 128 // MetaCacheImp stores metadata and info of pod, node, pool, subnuma etc. as a cache, 129 // and synchronizes data to sysadvisor state file. It is thread-safe to read and write. 130 // Deep copy logic is performed during accessing metacache entries instead of directly 131 // return pointer of each struct to avoid mis-overwrite. 132 type MetaCacheImp struct { 133 metrictypes.MetricsReader 134 135 podEntries types.PodEntries 136 podMutex sync.RWMutex 137 138 poolEntries types.PoolEntries 139 poolMutex sync.RWMutex 140 141 regionEntries types.RegionEntries 142 regionMutex sync.RWMutex 143 144 checkpointManager checkpointmanager.CheckpointManager 145 checkpointName string 146 147 emitter metrics.MetricEmitter 148 149 modelToResult map[string]interface{} 150 modelMutex sync.RWMutex 151 152 containerCreateTimestamp map[string]int64 153 } 154 155 var _ MetaCache = &MetaCacheImp{} 156 157 // NewMetaCacheImp returns the single instance of MetaCacheImp 158 func NewMetaCacheImp(conf *config.Configuration, emitterPool metricspool.MetricsEmitterPool, metricsReader metrictypes.MetricsReader) (*MetaCacheImp, error) { 159 stateFileDir := conf.GenericSysAdvisorConfiguration.StateFileDirectory 160 checkpointManager, err := checkpointmanager.NewCheckpointManager(stateFileDir) 161 if err != nil { 162 return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err) 163 } 164 emitter := emitterPool.GetDefaultMetricsEmitter().WithTags("advisor-metacache") 165 166 mc := &MetaCacheImp{ 167 MetricsReader: metricsReader, 168 podEntries: make(types.PodEntries), 169 poolEntries: make(types.PoolEntries), 170 regionEntries: make(types.RegionEntries), 171 checkpointManager: checkpointManager, 172 checkpointName: stateFileName, 173 emitter: emitter, 174 modelToResult: make(map[string]interface{}), 175 containerCreateTimestamp: make(map[string]int64), 176 } 177 178 // Restore from checkpoint before any function call to metacache api 179 if err := mc.restoreState(); err != nil { 180 return mc, err 181 } 182 183 return mc, nil 184 } 185 186 /* 187 standard implementation for metaReader 188 */ 189 190 func (mc *MetaCacheImp) GetContainerEntries(podUID string) (types.ContainerEntries, bool) { 191 mc.podMutex.RLock() 192 defer mc.podMutex.RUnlock() 193 194 v, ok := mc.podEntries[podUID] 195 return v.Clone(), ok 196 } 197 198 func (mc *MetaCacheImp) GetContainerInfo(podUID string, containerName string) (*types.ContainerInfo, bool) { 199 mc.podMutex.RLock() 200 defer mc.podMutex.RUnlock() 201 202 podInfo, ok := mc.podEntries[podUID] 203 if !ok { 204 return nil, false 205 } 206 containerInfo, ok := podInfo[containerName] 207 208 return containerInfo.Clone(), ok 209 } 210 211 // RangeContainer should deepcopy so that pod and container entries will not be overwritten. 212 func (mc *MetaCacheImp) RangeContainer(f func(podUID string, containerName string, containerInfo *types.ContainerInfo) bool) { 213 mc.podMutex.RLock() 214 defer mc.podMutex.RUnlock() 215 216 for podUID, podInfo := range mc.podEntries.Clone() { 217 for containerName, containerInfo := range podInfo { 218 if !f(podUID, containerName, containerInfo) { 219 break 220 } 221 } 222 } 223 } 224 225 func (mc *MetaCacheImp) GetPoolInfo(poolName string) (*types.PoolInfo, bool) { 226 mc.poolMutex.RLock() 227 defer mc.poolMutex.RUnlock() 228 229 poolInfo, ok := mc.poolEntries[poolName] 230 return poolInfo.Clone(), ok 231 } 232 233 func (mc *MetaCacheImp) GetPoolSize(poolName string) (int, bool) { 234 mc.poolMutex.RLock() 235 defer mc.poolMutex.RUnlock() 236 237 pi, ok := mc.poolEntries[poolName] 238 if !ok { 239 return 0, false 240 } 241 return machine.CountCPUAssignmentCPUs(pi.TopologyAwareAssignments), true 242 } 243 244 func (mc *MetaCacheImp) GetRegionInfo(regionName string) (*types.RegionInfo, bool) { 245 mc.regionMutex.RLock() 246 defer mc.regionMutex.RUnlock() 247 248 regionInfo, ok := mc.regionEntries[regionName] 249 return regionInfo.Clone(), ok 250 } 251 252 // GetFilteredInferenceResult gets specified model inference result with filter function 253 // whether it returns a deep copied result depends on the implementation of filterFunc 254 func (mc *MetaCacheImp) GetFilteredInferenceResult(filterFunc func(result interface{}) (interface{}, error), 255 modelName string, 256 ) (interface{}, error) { 257 mc.modelMutex.RLock() 258 defer mc.modelMutex.RUnlock() 259 260 if mc.modelToResult[modelName] == nil { 261 return nil, fmt.Errorf("result for model: %s doesn't exist", modelName) 262 } 263 264 if filterFunc == nil { 265 return mc.modelToResult[modelName], nil 266 } else { 267 return filterFunc(mc.modelToResult[modelName]) 268 } 269 } 270 271 // GetInferenceResult gets specified model inference result 272 // notice it doesn't return a deep copied result 273 func (mc *MetaCacheImp) GetInferenceResult(modelName string) (interface{}, error) { 274 return mc.GetFilteredInferenceResult(nil, modelName) 275 } 276 277 func (mc *MetaCacheImp) RangeRegionInfo(f func(regionName string, regionInfo *types.RegionInfo) bool) { 278 mc.regionMutex.RLock() 279 defer mc.regionMutex.RUnlock() 280 281 for regionName, regionInfo := range mc.regionEntries.Clone() { 282 if !f(regionName, regionInfo) { 283 break 284 } 285 } 286 } 287 288 /* 289 standard implementation for MetaWriter 290 */ 291 292 func (mc *MetaCacheImp) AddContainer(podUID string, containerName string, containerInfo *types.ContainerInfo) error { 293 mc.podMutex.Lock() 294 defer mc.podMutex.Unlock() 295 296 if podInfo, ok := mc.podEntries[podUID]; ok { 297 if ci, ok := podInfo[containerName]; ok { 298 ci.UpdateMeta(containerInfo) 299 return nil 300 } 301 } 302 303 mc.setContainerCreateTimestamp(podUID, containerName, time.Now().UnixNano()) 304 if mc.setContainerInfo(podUID, containerName, containerInfo) { 305 return mc.storeState() 306 } 307 return nil 308 } 309 310 func (mc *MetaCacheImp) SetContainerInfo(podUID string, containerName string, containerInfo *types.ContainerInfo) error { 311 mc.podMutex.Lock() 312 defer mc.podMutex.Unlock() 313 314 if mc.setContainerInfo(podUID, containerName, containerInfo) { 315 return mc.storeState() 316 } 317 return nil 318 } 319 320 func (mc *MetaCacheImp) setContainerInfo(podUID string, containerName string, containerInfo *types.ContainerInfo) bool { 321 podInfo, ok := mc.podEntries[podUID] 322 if !ok { 323 mc.podEntries[podUID] = make(types.ContainerEntries) 324 podInfo = mc.podEntries[podUID] 325 } 326 327 if reflect.DeepEqual(podInfo[containerName], containerInfo) { 328 return false 329 } else { 330 podInfo[containerName] = containerInfo 331 return true 332 } 333 } 334 335 func (mc *MetaCacheImp) RangeAndUpdateContainer(f func(podUID string, containerName string, containerInfo *types.ContainerInfo) bool) error { 336 mc.podMutex.Lock() 337 defer mc.podMutex.Unlock() 338 339 oldPodEntries := mc.podEntries.Clone() 340 341 for podUID, podInfo := range mc.podEntries { 342 for containerName, containerInfo := range podInfo { 343 if !f(podUID, containerName, containerInfo) { 344 break 345 } 346 } 347 } 348 349 if !reflect.DeepEqual(oldPodEntries, mc.podEntries) { 350 return mc.storeState() 351 } 352 return nil 353 } 354 355 func (mc *MetaCacheImp) DeleteContainer(podUID string, containerName string) error { 356 mc.podMutex.Lock() 357 defer mc.podMutex.Unlock() 358 359 if mc.deleteContainer(podUID, containerName) { 360 return mc.storeState() 361 } 362 return nil 363 } 364 365 func (mc *MetaCacheImp) ClearContainers() error { 366 mc.podMutex.Lock() 367 defer mc.podMutex.Unlock() 368 369 if len(mc.containerCreateTimestamp) != 0 { 370 mc.containerCreateTimestamp = map[string]int64{} 371 } 372 if len(mc.podEntries) != 0 { 373 mc.podEntries = map[string]types.ContainerEntries{} 374 return mc.storeState() 375 } 376 377 return nil 378 } 379 380 func (mc *MetaCacheImp) RangeAndDeleteContainer(f func(containerInfo *types.ContainerInfo) bool, safeTime int64) error { 381 mc.podMutex.Lock() 382 defer mc.podMutex.Unlock() 383 384 needStoreState := false 385 for _, podInfo := range mc.podEntries { 386 for _, containerInfo := range podInfo { 387 if safeTime > 0 { 388 createAt := mc.getContainerCreateTimestamp(containerInfo.PodUID, containerInfo.ContainerName) 389 if createAt > safeTime { 390 continue 391 } 392 } 393 if f(containerInfo) { 394 klog.Warningf("RangeAndDeleteContainer delete container %s/%s with safe time (%d) and create time (%d)", 395 containerInfo.PodUID, containerInfo.ContainerName, safeTime, mc.getContainerCreateTimestamp(containerInfo.PodUID, containerInfo.ContainerName)) 396 if mc.deleteContainer(containerInfo.PodUID, containerInfo.ContainerName) { 397 needStoreState = true 398 } 399 } 400 } 401 } 402 403 if needStoreState { 404 return mc.storeState() 405 } 406 return nil 407 } 408 409 func (mc *MetaCacheImp) deleteContainer(podUID string, containerName string) bool { 410 mc.deleteContainerCreateTimestamp(podUID, containerName) 411 412 podInfo, ok := mc.podEntries[podUID] 413 if !ok { 414 return false 415 } 416 _, ok = podInfo[containerName] 417 if !ok { 418 return false 419 } 420 421 delete(podInfo, containerName) 422 if len(podInfo) <= 0 { 423 delete(mc.podEntries, podUID) 424 } 425 return true 426 } 427 428 func (mc *MetaCacheImp) RemovePod(podUID string) error { 429 mc.podMutex.Lock() 430 defer mc.podMutex.Unlock() 431 432 containerEntries, ok := mc.podEntries[podUID] 433 if !ok { 434 return nil 435 } 436 for _, container := range containerEntries { 437 mc.deleteContainerCreateTimestamp(podUID, container.ContainerName) 438 } 439 delete(mc.podEntries, podUID) 440 441 return mc.storeState() 442 } 443 444 func (mc *MetaCacheImp) SetPoolInfo(poolName string, poolInfo *types.PoolInfo) error { 445 mc.poolMutex.Lock() 446 defer mc.poolMutex.Unlock() 447 448 if reflect.DeepEqual(mc.poolEntries[poolName], poolInfo) { 449 return nil 450 } 451 452 mc.poolEntries[poolName] = poolInfo 453 454 return mc.storeState() 455 } 456 457 func (mc *MetaCacheImp) DeletePool(poolName string) error { 458 mc.poolMutex.Lock() 459 defer mc.poolMutex.Unlock() 460 461 if _, ok := mc.poolEntries[poolName]; !ok { 462 return nil 463 } 464 465 delete(mc.poolEntries, poolName) 466 467 return mc.storeState() 468 } 469 470 func (mc *MetaCacheImp) GCPoolEntries(livingPoolNameSet sets.String) error { 471 mc.poolMutex.Lock() 472 defer mc.poolMutex.Unlock() 473 474 needStoreState := false 475 for poolName := range mc.poolEntries { 476 if _, ok := livingPoolNameSet[poolName]; !ok { 477 delete(mc.poolEntries, poolName) 478 needStoreState = true 479 } 480 } 481 482 if needStoreState { 483 return mc.storeState() 484 } 485 return nil 486 } 487 488 func (mc *MetaCacheImp) SetRegionEntries(entries types.RegionEntries) error { 489 mc.regionMutex.Lock() 490 defer mc.regionMutex.Unlock() 491 492 oldRegionEntries := mc.regionEntries.Clone() 493 mc.regionEntries = entries.Clone() 494 495 if !reflect.DeepEqual(oldRegionEntries, mc.regionEntries) { 496 return mc.storeState() 497 } 498 return nil 499 } 500 501 func (mc *MetaCacheImp) SetRegionInfo(regionName string, regionInfo *types.RegionInfo) error { 502 mc.regionMutex.Lock() 503 defer mc.regionMutex.Unlock() 504 505 if reflect.DeepEqual(mc.regionEntries[regionName], regionInfo) { 506 return nil 507 } else { 508 mc.regionEntries[regionName] = regionInfo 509 return mc.storeState() 510 } 511 } 512 513 // SetInferenceResult sets specified model inference result 514 func (mc *MetaCacheImp) SetInferenceResult(modelName string, result interface{}) error { 515 general.InfoS("called", "modelName", modelName) 516 517 if result == nil { 518 return fmt.Errorf("nil result") 519 } 520 521 mc.modelMutex.Lock() 522 defer mc.modelMutex.Unlock() 523 524 mc.modelToResult[modelName] = result 525 return nil 526 } 527 528 /* 529 other helper functions 530 */ 531 532 func (mc *MetaCacheImp) storeState() error { 533 checkpoint := NewMetaCacheCheckpoint() 534 checkpoint.PodEntries = mc.podEntries 535 checkpoint.PoolEntries = mc.poolEntries 536 checkpoint.RegionEntries = mc.regionEntries 537 538 startTime := time.Now() 539 defer func(t time.Time) { 540 elapsed := time.Since(t) 541 if elapsed > storeStateWarningDuration { 542 klog.Errorf("[metacache] store state took too long time, duration %v", elapsed) 543 } 544 _ = mc.emitter.StoreFloat64(metricMetaCacheStoreStateDuration, float64(elapsed/time.Millisecond), metrics.MetricTypeNameRaw) 545 }(startTime) 546 547 if err := mc.checkpointManager.CreateCheckpoint(mc.checkpointName, checkpoint); err != nil { 548 klog.Errorf("[metacache] store state failed: %v", err) 549 return err 550 } 551 klog.Infof("[metacache] store state succeeded") 552 553 return nil 554 } 555 556 func (mc *MetaCacheImp) restoreState() error { 557 checkpoint := NewMetaCacheCheckpoint() 558 559 if err := mc.checkpointManager.GetCheckpoint(mc.checkpointName, checkpoint); err != nil { 560 klog.Infof("[metacache] checkpoint %v err %v, create it", mc.checkpointName, err) 561 return mc.storeState() 562 } 563 564 mc.podEntries = checkpoint.PodEntries 565 mc.poolEntries = checkpoint.PoolEntries 566 mc.regionEntries = checkpoint.RegionEntries 567 568 klog.Infof("[metacache] restore state succeeded") 569 570 return nil 571 } 572 573 func (mc *MetaCacheImp) setContainerCreateTimestamp(podUID, containerName string, timestamp int64) { 574 mc.containerCreateTimestamp[fmt.Sprintf("%s/%s", podUID, containerName)] = timestamp 575 } 576 577 func (mc *MetaCacheImp) getContainerCreateTimestamp(podUID, containerName string) int64 { 578 return mc.containerCreateTimestamp[fmt.Sprintf("%s/%s", podUID, containerName)] 579 } 580 581 func (mc *MetaCacheImp) deleteContainerCreateTimestamp(podUID, containerName string) { 582 delete(mc.containerCreateTimestamp, fmt.Sprintf("%s/%s", podUID, containerName)) 583 }