github.com/kubewharf/katalyst-core@v0.5.3/pkg/metaserver/agent/metric/metric_impl.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metric 18 19 import ( 20 "context" 21 "fmt" 22 "math/rand" 23 "sync" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 "k8s.io/apimachinery/pkg/util/wait" 28 29 "github.com/kubewharf/katalyst-core/pkg/config/agent/global" 30 "github.com/kubewharf/katalyst-core/pkg/config/agent/metaserver" 31 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/types" 32 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" 33 "github.com/kubewharf/katalyst-core/pkg/metrics" 34 "github.com/kubewharf/katalyst-core/pkg/util/machine" 35 utilmetric "github.com/kubewharf/katalyst-core/pkg/util/metric" 36 "github.com/kubewharf/katalyst-core/pkg/util/syntax" 37 ) 38 39 type MetricsNotifierManagerImpl struct { 40 *syntax.RWMutex 41 metricStore *utilmetric.MetricStore 42 registeredNotifier map[types.MetricsScope]map[string]*types.NotifiedData 43 } 44 45 func NewMetricsNotifierManager(metricStore *utilmetric.MetricStore, emitter metrics.MetricEmitter) types.MetricsNotifierManager { 46 return &MetricsNotifierManagerImpl{ 47 metricStore: metricStore, 48 RWMutex: syntax.NewRWMutex(emitter), 49 registeredNotifier: map[types.MetricsScope]map[string]*types.NotifiedData{ 50 types.MetricsScopeNode: make(map[string]*types.NotifiedData), 51 types.MetricsScopeNuma: make(map[string]*types.NotifiedData), 52 types.MetricsScopeCPU: make(map[string]*types.NotifiedData), 53 types.MetricsScopeDevice: make(map[string]*types.NotifiedData), 54 types.MetricsScopeContainer: make(map[string]*types.NotifiedData), 55 types.MetricsScopeContainerNUMA: make(map[string]*types.NotifiedData), 56 }, 57 } 58 } 59 60 func (m *MetricsNotifierManagerImpl) RegisterNotifier(scope types.MetricsScope, req types.NotifiedRequest, 61 response chan types.NotifiedResponse, 62 ) string { 63 if _, ok := m.registeredNotifier[scope]; !ok { 64 return "" 65 } 66 67 m.Lock() 68 defer m.Unlock() 69 70 randBytes := make([]byte, 30) 71 rand.Read(randBytes) 72 key := string(randBytes) 73 74 m.registeredNotifier[scope][key] = &types.NotifiedData{ 75 Scope: scope, 76 Req: req, 77 Response: response, 78 } 79 return key 80 } 81 82 func (m *MetricsNotifierManagerImpl) DeRegisterNotifier(scope types.MetricsScope, key string) { 83 m.Lock() 84 defer m.Unlock() 85 86 delete(m.registeredNotifier[scope], key) 87 } 88 89 func (m *MetricsNotifierManagerImpl) Notify() { 90 m.notifySystem() 91 m.notifyPods() 92 } 93 94 // notifySystem notifies system-related data 95 func (m *MetricsNotifierManagerImpl) notifySystem() { 96 now := time.Now() 97 m.RLock() 98 defer m.RUnlock() 99 100 for _, reg := range m.registeredNotifier[types.MetricsScopeNode] { 101 v, err := m.metricStore.GetNodeMetric(reg.Req.MetricName) 102 if err != nil { 103 continue 104 } else if v.Time == nil { 105 v.Time = &now 106 } 107 108 if reg.LastNotify.Equal(*v.Time) { 109 continue 110 } else { 111 reg.LastNotify = *v.Time 112 } 113 114 reg.Response <- types.NotifiedResponse{ 115 Req: reg.Req, 116 MetricData: v, 117 } 118 } 119 120 for _, reg := range m.registeredNotifier[types.MetricsScopeDevice] { 121 v, err := m.metricStore.GetDeviceMetric(reg.Req.DeviceID, reg.Req.MetricName) 122 if err != nil { 123 continue 124 } else if v.Time == nil { 125 v.Time = &now 126 } 127 128 if reg.LastNotify.Equal(*v.Time) { 129 continue 130 } else { 131 reg.LastNotify = *v.Time 132 } 133 134 reg.Response <- types.NotifiedResponse{ 135 Req: reg.Req, 136 MetricData: v, 137 } 138 } 139 140 for n, reg := range m.registeredNotifier[types.MetricsScopeNuma] { 141 v, err := m.metricStore.GetNumaMetric(reg.Req.NumaID, reg.Req.MetricName) 142 if err != nil { 143 continue 144 } else if v.Time == nil { 145 v.Time = &now 146 } 147 148 if m.registeredNotifier[types.MetricsScopeNuma][n].LastNotify.Equal(*v.Time) { 149 continue 150 } else { 151 reg.LastNotify = *v.Time 152 } 153 154 reg.Response <- types.NotifiedResponse{ 155 Req: reg.Req, 156 MetricData: v, 157 } 158 } 159 160 for n, reg := range m.registeredNotifier[types.MetricsScopeCPU] { 161 v, err := m.metricStore.GetCPUMetric(reg.Req.CoreID, reg.Req.MetricName) 162 if err != nil { 163 continue 164 } else if v.Time == nil { 165 v.Time = &now 166 } 167 168 if reg.LastNotify.Equal(*v.Time) { 169 continue 170 } else { 171 m.registeredNotifier[types.MetricsScopeCPU][n].LastNotify = *v.Time 172 } 173 174 reg.Response <- types.NotifiedResponse{ 175 Req: reg.Req, 176 MetricData: v, 177 } 178 } 179 } 180 181 // notifySystem notifies pod-related data 182 func (m *MetricsNotifierManagerImpl) notifyPods() { 183 now := time.Now() 184 m.RLock() 185 defer m.RUnlock() 186 187 for _, reg := range m.registeredNotifier[types.MetricsScopeContainer] { 188 v, err := m.metricStore.GetContainerMetric(reg.Req.PodUID, reg.Req.ContainerName, reg.Req.MetricName) 189 if err != nil { 190 continue 191 } else if v.Time == nil { 192 v.Time = &now 193 } 194 195 if reg.LastNotify.Equal(*v.Time) { 196 continue 197 } else { 198 reg.LastNotify = *v.Time 199 } 200 201 reg.Response <- types.NotifiedResponse{ 202 Req: reg.Req, 203 MetricData: v, 204 } 205 } 206 207 for _, reg := range m.registeredNotifier[types.MetricsScopeContainerNUMA] { 208 if reg.Req.NumaNode == "" { 209 continue 210 } 211 212 v, err := m.metricStore.GetContainerNumaMetric(reg.Req.PodUID, reg.Req.ContainerName, fmt.Sprintf("%v", reg.Req.NumaNode), reg.Req.MetricName) 213 if err != nil { 214 continue 215 } else if v.Time == nil { 216 v.Time = &now 217 } 218 219 if reg.LastNotify.Equal(*v.Time) { 220 continue 221 } else { 222 reg.LastNotify = *v.Time 223 } 224 225 reg.Response <- types.NotifiedResponse{ 226 Req: reg.Req, 227 MetricData: v, 228 } 229 } 230 } 231 232 type ExternalMetricManagerImpl struct { 233 *syntax.RWMutex 234 metricStore *utilmetric.MetricStore 235 registeredMetric []func(store *utilmetric.MetricStore) 236 } 237 238 func NewExternalMetricManager(metricStore *utilmetric.MetricStore, emitter metrics.MetricEmitter) types.ExternalMetricManager { 239 return &ExternalMetricManagerImpl{ 240 metricStore: metricStore, 241 RWMutex: syntax.NewRWMutex(emitter), 242 } 243 } 244 245 func (m *ExternalMetricManagerImpl) RegisterExternalMetric(f func(store *utilmetric.MetricStore)) { 246 m.Lock() 247 defer m.Unlock() 248 m.registeredMetric = append(m.registeredMetric, f) 249 } 250 251 func (m *ExternalMetricManagerImpl) Sample() { 252 m.RLock() 253 defer m.RUnlock() 254 for _, f := range m.registeredMetric { 255 f(m.metricStore) 256 } 257 } 258 259 type MetricsFetcherImpl struct { 260 startOnce sync.Once 261 hasSynced bool 262 263 metricStore *utilmetric.MetricStore 264 metricsNotifierManager types.MetricsNotifierManager 265 externalMetricManager types.ExternalMetricManager 266 checkMetricDataExpire CheckMetricDataExpireFunc 267 268 defaultInterval time.Duration 269 provisioners map[string]types.MetricsProvisioner 270 intervals map[string]time.Duration 271 } 272 273 func NewMetricsFetcher(baseConf *global.BaseConfiguration, metricConf *metaserver.MetricConfiguration, emitter metrics.MetricEmitter, podFetcher pod.PodFetcher) types.MetricsFetcher { 274 metricStore := utilmetric.NewMetricStore() 275 metricsNotifierManager := NewMetricsNotifierManager(metricStore, emitter) 276 externalMetricManager := NewExternalMetricManager(metricStore, emitter) 277 278 intervals := make(map[string]time.Duration) 279 provisioners := make(map[string]types.MetricsProvisioner) 280 registeredProvisioners := getProvisioners() 281 for _, name := range metricConf.MetricProvisions { 282 if f, ok := registeredProvisioners[name]; ok { 283 intervals[name] = metricConf.DefaultInterval 284 if interval, exist := metricConf.ProvisionerIntervals[name]; exist { 285 intervals[name] = interval 286 } 287 provisioners[name] = f(baseConf, metricConf, emitter, podFetcher, metricStore) 288 } 289 } 290 291 return &MetricsFetcherImpl{ 292 metricStore: metricStore, 293 metricsNotifierManager: metricsNotifierManager, 294 externalMetricManager: externalMetricManager, 295 checkMetricDataExpire: checkMetricDataExpireFunc(metricConf.MetricInsurancePeriod), 296 297 defaultInterval: metricConf.DefaultInterval, 298 provisioners: provisioners, 299 intervals: intervals, 300 } 301 } 302 303 func (f *MetricsFetcherImpl) GetNodeMetric(metricName string) (utilmetric.MetricData, error) { 304 return f.checkMetricDataExpire(f.metricStore.GetNodeMetric(metricName)) 305 } 306 307 func (f *MetricsFetcherImpl) GetNumaMetric(numaID int, metricName string) (utilmetric.MetricData, error) { 308 return f.checkMetricDataExpire(f.metricStore.GetNumaMetric(numaID, metricName)) 309 } 310 311 func (f *MetricsFetcherImpl) GetDeviceMetric(deviceName string, metricName string) (utilmetric.MetricData, error) { 312 return f.checkMetricDataExpire(f.metricStore.GetDeviceMetric(deviceName, metricName)) 313 } 314 315 func (f *MetricsFetcherImpl) GetCPUMetric(coreID int, metricName string) (utilmetric.MetricData, error) { 316 return f.checkMetricDataExpire(f.metricStore.GetCPUMetric(coreID, metricName)) 317 } 318 319 func (f *MetricsFetcherImpl) GetContainerMetric(podUID, containerName, metricName string) (utilmetric.MetricData, error) { 320 return f.checkMetricDataExpire(f.metricStore.GetContainerMetric(podUID, containerName, metricName)) 321 } 322 323 func (f *MetricsFetcherImpl) GetContainerNumaMetric(podUID, containerName, numaNode, metricName string) (utilmetric.MetricData, error) { 324 return f.checkMetricDataExpire(f.metricStore.GetContainerNumaMetric(podUID, containerName, numaNode, metricName)) 325 } 326 327 func (f *MetricsFetcherImpl) GetPodVolumeMetric(podUID, volumeName, metricName string) (utilmetric.MetricData, error) { 328 return f.checkMetricDataExpire(f.metricStore.GetPodVolumeMetric(podUID, volumeName, metricName)) 329 } 330 331 func (f *MetricsFetcherImpl) GetCgroupMetric(cgroupPath, metricName string) (utilmetric.MetricData, error) { 332 return f.checkMetricDataExpire(f.metricStore.GetCgroupMetric(cgroupPath, metricName)) 333 } 334 335 func (f *MetricsFetcherImpl) GetCgroupNumaMetric(cgroupPath string, numaNode int, metricName string) (utilmetric.MetricData, error) { 336 return f.checkMetricDataExpire(f.metricStore.GetCgroupNumaMetric(cgroupPath, numaNode, metricName)) 337 } 338 339 func (f *MetricsFetcherImpl) AggregatePodNumaMetric(podList []*v1.Pod, numaNode, metricName string, 340 agg utilmetric.Aggregator, filter utilmetric.ContainerMetricFilter, 341 ) utilmetric.MetricData { 342 return f.metricStore.AggregatePodNumaMetric(podList, numaNode, metricName, agg, filter) 343 } 344 345 func (f *MetricsFetcherImpl) AggregatePodMetric(podList []*v1.Pod, metricName string, 346 agg utilmetric.Aggregator, filter utilmetric.ContainerMetricFilter, 347 ) utilmetric.MetricData { 348 return f.metricStore.AggregatePodMetric(podList, metricName, agg, filter) 349 } 350 351 func (f *MetricsFetcherImpl) AggregateCoreMetric(cpuset machine.CPUSet, metricName string, agg utilmetric.Aggregator) utilmetric.MetricData { 352 return f.metricStore.AggregateCoreMetric(cpuset, metricName, agg) 353 } 354 355 func (f *MetricsFetcherImpl) RegisterNotifier(scope types.MetricsScope, req types.NotifiedRequest, response chan types.NotifiedResponse) string { 356 return f.metricsNotifierManager.RegisterNotifier(scope, req, response) 357 } 358 359 func (f *MetricsFetcherImpl) DeRegisterNotifier(scope types.MetricsScope, key string) { 360 f.metricsNotifierManager.DeRegisterNotifier(scope, key) 361 } 362 363 func (f *MetricsFetcherImpl) RegisterExternalMetric(externalMetricFunc func(store *utilmetric.MetricStore)) { 364 f.externalMetricManager.RegisterExternalMetric(externalMetricFunc) 365 } 366 367 func (f *MetricsFetcherImpl) Run(ctx context.Context) { 368 // make sure all provisioners have started at least once, 369 // and then allow each provisioner to collect metrics with 370 // its specified period. 371 // whenever any provisioner finishes its collecting process, 372 // notification will be triggered, and the consumer should 373 // handler duplication logic if necessary. 374 f.startOnce.Do(func() { 375 f.init(ctx) 376 f.run(ctx) 377 }) 378 } 379 380 func (f *MetricsFetcherImpl) init(ctx context.Context) { 381 wg := sync.WaitGroup{} 382 for name := range f.provisioners { 383 p := f.provisioners[name] 384 wg.Add(1) 385 go func() { 386 defer wg.Done() 387 p.Run(ctx) 388 }() 389 } 390 wg.Wait() 391 392 if f.externalMetricManager != nil { 393 f.externalMetricManager.Sample() 394 } 395 396 if f.metricsNotifierManager != nil { 397 f.metricsNotifierManager.Notify() 398 } 399 400 if !f.hasSynced { 401 f.hasSynced = true 402 } 403 } 404 405 func (f *MetricsFetcherImpl) run(ctx context.Context) { 406 // provisioner's implementation and its interval always exist, 407 // and it's ensured in init function 408 for name := range f.provisioners { 409 p := f.provisioners[name] 410 t := f.intervals[name] 411 go wait.Until(func() { 412 p.Run(ctx) 413 if f.metricsNotifierManager != nil { 414 f.metricsNotifierManager.Notify() 415 } 416 }, t, ctx.Done()) 417 } 418 419 if f.externalMetricManager != nil { 420 go wait.Until(func() { 421 f.externalMetricManager.Sample() 422 if f.metricsNotifierManager != nil { 423 f.metricsNotifierManager.Notify() 424 } 425 }, f.defaultInterval, ctx.Done()) 426 } 427 } 428 429 func (f *MetricsFetcherImpl) HasSynced() bool { 430 return f.hasSynced 431 }