github.com/polarismesh/polaris@v1.17.8/cache/service/instance.go (about) 1 /** 2 * Tencent is pleased to support the open source community by making Polaris available. 3 * 4 * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 5 * 6 * Licensed under the BSD 3-Clause License (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * https://opensource.org/licenses/BSD-3-Clause 11 * 12 * Unless required by applicable law or agreed to in writing, software distributed 13 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 14 * CONDITIONS OF ANY KIND, either express or implied. See the License for the 15 * specific language governing permissions and limitations under the License. 16 */ 17 18 package service 19 20 import ( 21 "time" 22 23 apimodel "github.com/polarismesh/specification/source/go/api/v1/model" 24 apiservice "github.com/polarismesh/specification/source/go/api/v1/service_manage" 25 "go.uber.org/zap" 26 "golang.org/x/sync/singleflight" 27 28 types "github.com/polarismesh/polaris/cache/api" 29 "github.com/polarismesh/polaris/common/model" 30 "github.com/polarismesh/polaris/common/utils" 31 "github.com/polarismesh/polaris/store" 32 ) 33 34 const ( 35 // 定时全量对账 36 checkAllIntervalSec = 60 37 ) 38 39 // instanceCache 实例缓存的类 40 type instanceCache struct { 41 *types.BaseCache 42 43 svcCache *serviceCache 44 storage store.Store 45 lastMtimeLogged int64 46 // instanceid -> instance 47 ids *utils.SyncMap[string, *model.Instance] 48 // service id -> [instanceid ->instance] 49 services *utils.SyncMap[string, *utils.SyncMap[string, *model.Instance]] 50 // service id -> [instanceCount] 51 instanceCounts *utils.SyncMap[string, *model.InstanceCount] 52 servicePortsBucket *servicePortsBucket 53 disableBusiness bool 54 needMeta bool 55 systemServiceID []string 56 singleFlight *singleflight.Group 57 instanceCount int64 58 lastCheckAllTime int64 59 } 60 61 // NewInstanceCache 新建一个instanceCache 62 func NewInstanceCache(storage store.Store, cacheMgr types.CacheManager) types.InstanceCache { 63 return &instanceCache{ 64 BaseCache: types.NewBaseCache(storage, cacheMgr), 65 storage: storage, 66 singleFlight: new(singleflight.Group), 67 } 68 } 69 70 // Initialize 初始化函数 71 func (ic *instanceCache) Initialize(opt map[string]interface{}) error { 72 ic.svcCache = ic.BaseCache.CacheMgr.GetCacher(types.CacheService).(*serviceCache) 73 ic.ids = utils.NewSyncMap[string, *model.Instance]() 74 ic.services = utils.NewSyncMap[string, *utils.SyncMap[string, *model.Instance]]() 75 ic.instanceCounts = utils.NewSyncMap[string, *model.InstanceCount]() 76 ic.servicePortsBucket = newServicePortsBucket() 77 if opt == nil { 78 return nil 79 } 80 ic.disableBusiness, _ = opt["disableBusiness"].(bool) 81 ic.needMeta, _ = opt["needMeta"].(bool) 82 // 只加载系统服务 83 if ic.disableBusiness { 84 services, err := ic.getSystemServices() 85 if err != nil { 86 return err 87 } 88 ic.systemServiceID = make([]string, 0, len(services)) 89 for _, service := range services { 90 if service.IsAlias() { 91 continue 92 } 93 ic.systemServiceID = append(ic.systemServiceID, service.ID) 94 } 95 } 96 return nil 97 } 98 99 // Update 更新缓存函数 100 func (ic *instanceCache) Update() error { 101 err, _ := ic.singleUpdate() 102 return err 103 } 104 105 func (ic *instanceCache) singleUpdate() (error, bool) { 106 // 多个线程竞争,只有一个线程进行更新 107 _, err, shared := ic.singleFlight.Do(ic.Name(), func() (interface{}, error) { 108 return nil, ic.DoCacheUpdate(ic.Name(), ic.realUpdate) 109 }) 110 return err, shared 111 } 112 113 func (ic *instanceCache) LastMtime() time.Time { 114 return ic.BaseCache.LastMtime(ic.Name()) 115 } 116 117 func (ic *instanceCache) checkAll(tx store.Tx) { 118 curTimeSec := time.Now().Unix() 119 if curTimeSec-ic.lastCheckAllTime < checkAllIntervalSec { 120 return 121 } 122 defer func() { 123 ic.lastCheckAllTime = curTimeSec 124 }() 125 count, err := ic.storage.GetInstancesCountTx(tx) 126 if err != nil { 127 log.Errorf("[Cache][Instance] get instance count from storage err: %s", err.Error()) 128 return 129 } 130 if ic.instanceCount == int64(count) { 131 return 132 } 133 log.Infof( 134 "[Cache][Instance] instance count not match, expect %d, actual %d, fallback to load all", 135 count, ic.instanceCount) 136 ic.ResetLastMtime(ic.Name()) 137 ic.ResetLastFetchTime() 138 } 139 140 const maxLoadTimeDuration = 1 * time.Second 141 142 func (ic *instanceCache) realUpdate() (map[string]time.Time, int64, error) { 143 // 拉取diff前的所有数据 144 start := time.Now() 145 146 tx, err := ic.storage.StartReadTx() 147 if err != nil { 148 if tx != nil { 149 _ = tx.Rollback() 150 } 151 log.Error("[Cache][Instance] begin transaction storage read tx", zap.Error(err)) 152 return nil, -1, err 153 } 154 155 var instanceChangeEvents []*cacheInstanceEvent 156 defer func() { 157 _ = tx.Rollback() 158 for i := range instanceChangeEvents { 159 ic.Manager.OnEvent(instanceChangeEvents[i].item, instanceChangeEvents[i].eventType) 160 } 161 ic.reportMetricsInfo() 162 }() 163 164 if err := tx.CreateReadView(); err != nil { 165 log.Error("[Cache][Instance] create storage snapshot read view", zap.Error(err)) 166 return nil, -1, err 167 } 168 169 events, lastMtimes, total, err := ic.handleUpdate(start, tx) 170 _ = tx.Commit() 171 instanceChangeEvents = events 172 return lastMtimes, total, err 173 } 174 175 func (ic *instanceCache) handleUpdate(start time.Time, tx store.Tx) ([]*cacheInstanceEvent, map[string]time.Time, int64, error) { 176 defer func() { 177 ic.lastMtimeLogged = types.LogLastMtime(ic.lastMtimeLogged, ic.LastMtime().Unix(), "Instance") 178 ic.checkAll(tx) 179 }() 180 181 instances, err := ic.storage.GetMoreInstances(tx, ic.LastFetchTime(), ic.IsFirstUpdate(), 182 ic.needMeta, ic.systemServiceID) 183 184 if err != nil { 185 log.Error("[Cache][Instance] update get storage more", zap.Error(err)) 186 return nil, nil, -1, err 187 } 188 189 events, lastMtimes, update, del := ic.setInstances(instances) 190 log.Info("[Cache][Instance] get more instances", 191 zap.Int("pull-from-store", len(instances)), zap.Int("update", update), zap.Int("delete", del), 192 zap.Time("last", ic.LastMtime()), zap.Duration("used", time.Since(start))) 193 return events, lastMtimes, int64(len(instances)), err 194 } 195 196 // Clear 清理内部缓存数据 197 func (ic *instanceCache) Clear() error { 198 ic.BaseCache.Clear() 199 ic.ids = utils.NewSyncMap[string, *model.Instance]() 200 ic.services = utils.NewSyncMap[string, *utils.SyncMap[string, *model.Instance]]() 201 ic.instanceCounts = utils.NewSyncMap[string, *model.InstanceCount]() 202 ic.servicePortsBucket.reset() 203 ic.instanceCount = 0 204 return nil 205 } 206 207 // Name 获取资源名称 208 func (ic *instanceCache) Name() string { 209 return types.InstanceName 210 } 211 212 // getSystemServices 获取系统服务ID 213 func (ic *instanceCache) getSystemServices() ([]*model.Service, error) { 214 services, err := ic.storage.GetSystemServices() 215 if err != nil { 216 log.Errorf("[Cache][Instance] get system services err: %s", err.Error()) 217 return nil, err 218 } 219 return services, nil 220 } 221 222 // setInstances 保存instance到内存中 223 // 返回:更新个数,删除个数 224 func (ic *instanceCache) setInstances(ins map[string]*model.Instance) ([]*cacheInstanceEvent, map[string]time.Time, int, int) { 225 if len(ins) == 0 { 226 return nil, nil, 0, 0 227 } 228 events := make([]*cacheInstanceEvent, 0, len(ins)) 229 addInstances := map[string]string{} 230 updateInstances := map[string]string{} 231 deleteInstances := map[string]string{} 232 233 lastMtime := ic.LastMtime().Unix() 234 update := 0 235 del := 0 236 affect := make(map[string]bool) 237 progress := 0 238 instanceCount := ic.instanceCount 239 240 for _, item := range ins { 241 progress++ 242 if progress%50000 == 0 { 243 log.Infof("[Cache][Instance] set instances progress: %d / %d", progress, len(ins)) 244 } 245 modifyTime := item.ModifyTime.Unix() 246 if lastMtime < modifyTime { 247 lastMtime = modifyTime 248 } 249 affect[item.ServiceID] = true 250 _, itemExist := ic.ids.Load(item.ID()) 251 // 待删除的instance 252 if !item.Valid { 253 deleteInstances[item.ID()] = item.Revision() 254 del++ 255 ic.ids.Delete(item.ID()) 256 if itemExist { 257 events = append(events, &cacheInstanceEvent{ 258 item: item, 259 eventType: types.EventDeleted, 260 }) 261 instanceCount-- 262 } 263 value, ok := ic.services.Load(item.ServiceID) 264 if !ok { 265 continue 266 } 267 268 value.Delete(item.ID()) 269 continue 270 } 271 // 有修改或者新增的数据 272 update++ 273 // 缓存的instance map增加一个version和protocol字段 274 if item.Proto.Metadata == nil { 275 item.Proto.Metadata = make(map[string]string) 276 } 277 278 item = fillInternalLabels(item) 279 280 ic.ids.Store(item.ID(), item) 281 if !itemExist { 282 addInstances[item.ID()] = item.Revision() 283 instanceCount++ 284 events = append(events, &cacheInstanceEvent{ 285 item: item, 286 eventType: types.EventCreated, 287 }) 288 } else { 289 updateInstances[item.ID()] = item.Revision() 290 events = append(events, &cacheInstanceEvent{ 291 item: item, 292 eventType: types.EventUpdated, 293 }) 294 } 295 value, ok := ic.services.Load(item.ServiceID) 296 if !ok { 297 value = utils.NewSyncMap[string, *model.Instance]() 298 ic.services.Store(item.ServiceID, value) 299 } 300 301 ic.servicePortsBucket.appendPort(item.ServiceID, item.Protocol(), item.Port()) 302 value.Store(item.ID(), item) 303 } 304 305 if ic.instanceCount != instanceCount { 306 log.Infof("[Cache][Instance] instance count update from %d to %d", 307 ic.instanceCount, instanceCount) 308 ic.instanceCount = instanceCount 309 } 310 311 log.Info("[Cache][Instance] instances change info", zap.Any("add", addInstances), 312 zap.Any("update", updateInstances), zap.Any("delete", deleteInstances)) 313 314 ic.postProcessUpdatedServices(affect) 315 ic.svcCache.notifyServiceCountReload(affect) 316 return events, map[string]time.Time{ 317 ic.Name(): time.Unix(lastMtime, 0), 318 }, update, del 319 } 320 321 func fillInternalLabels(item *model.Instance) *model.Instance { 322 if len(item.Version()) > 0 { 323 item.Proto.Metadata["version"] = item.Version() 324 } 325 if len(item.Protocol()) > 0 { 326 item.Proto.Metadata["protocol"] = item.Protocol() 327 } 328 329 if item.Location() != nil { 330 item.Proto.Metadata["region"] = item.Location().GetRegion().GetValue() 331 item.Proto.Metadata["zone"] = item.Location().GetZone().GetValue() 332 item.Proto.Metadata["campus"] = item.Location().GetCampus().GetValue() 333 } 334 return item 335 } 336 337 func (ic *instanceCache) postProcessUpdatedServices(affect map[string]bool) { 338 progress := 0 339 for serviceID := range affect { 340 ic.svcCache.GetRevisionWorker().Notify(serviceID, true) 341 progress++ 342 if progress%10000 == 0 { 343 log.Infof("[Cache][Instance] revision notify progress(%d / %d)", progress, len(affect)) 344 } 345 // 构建服务数量统计 346 value, ok := ic.services.Load(serviceID) 347 if !ok { 348 ic.instanceCounts.Delete(serviceID) 349 continue 350 } 351 count := &model.InstanceCount{} 352 value.Range(func(key string, instance *model.Instance) bool { 353 count.TotalInstanceCount++ 354 if isInstanceHealthy(instance) { 355 count.HealthyInstanceCount++ 356 } 357 if instance.Proto.GetIsolate().GetValue() { 358 count.IsolateInstanceCount++ 359 } 360 return true 361 }) 362 if count.TotalInstanceCount == 0 { 363 ic.instanceCounts.Delete(serviceID) 364 continue 365 } 366 ic.instanceCounts.Store(serviceID, count) 367 } 368 } 369 370 func isInstanceHealthy(instance *model.Instance) bool { 371 return instance.Proto.GetHealthy().GetValue() && !instance.Proto.GetIsolate().GetValue() 372 } 373 374 // GetInstance 根据实例ID获取实例数据 375 func (ic *instanceCache) GetInstance(instanceID string) *model.Instance { 376 if instanceID == "" { 377 return nil 378 } 379 380 value, ok := ic.ids.Load(instanceID) 381 if !ok { 382 return nil 383 } 384 385 return value 386 } 387 388 // GetInstancesByServiceID 根据ServiceID获取实例数据 389 func (ic *instanceCache) GetInstancesByServiceID(serviceID string) []*model.Instance { 390 if serviceID == "" { 391 return nil 392 } 393 394 value, ok := ic.services.Load(serviceID) 395 if !ok { 396 return nil 397 } 398 399 var out []*model.Instance 400 value.Range(func(k string, v *model.Instance) bool { 401 out = append(out, v) 402 return true 403 }) 404 405 return out 406 } 407 408 // GetInstancesCountByServiceID 根据服务ID获取实例数 409 func (ic *instanceCache) GetInstancesCountByServiceID(serviceID string) model.InstanceCount { 410 if serviceID == "" { 411 return model.InstanceCount{} 412 } 413 414 value, ok := ic.instanceCounts.Load(serviceID) 415 if !ok { 416 return model.InstanceCount{} 417 } 418 return *value 419 } 420 421 // IteratorInstances 迭代所有的instance的函数 422 func (ic *instanceCache) IteratorInstances(iterProc types.InstanceIterProc) error { 423 return iteratorInstancesProc(ic.ids, iterProc) 424 } 425 426 // IteratorInstancesWithService 根据服务ID进行迭代回调 427 func (ic *instanceCache) IteratorInstancesWithService(serviceID string, iterProc types.InstanceIterProc) error { 428 if serviceID == "" { 429 return nil 430 } 431 value, ok := ic.services.Load(serviceID) 432 if !ok { 433 return nil 434 } 435 436 return iteratorInstancesProc(value, iterProc) 437 } 438 439 // GetInstancesCount 获取实例的个数 440 func (ic *instanceCache) GetInstancesCount() int { 441 count := 0 442 ic.ids.Range(func(key string, value *model.Instance) bool { 443 count++ 444 return true 445 }) 446 447 return count 448 } 449 450 // GetInstanceLabels 获取某个服务下实例的所有标签信息集合 451 func (ic *instanceCache) GetInstanceLabels(serviceID string) *apiservice.InstanceLabels { 452 if serviceID == "" { 453 return &apiservice.InstanceLabels{} 454 } 455 456 value, ok := ic.services.Load(serviceID) 457 if !ok { 458 return &apiservice.InstanceLabels{} 459 } 460 461 ret := &apiservice.InstanceLabels{ 462 Labels: make(map[string]*apimodel.StringList), 463 } 464 465 tmp := make(map[string]map[string]struct{}) 466 _ = iteratorInstancesProc(value, func(key string, value *model.Instance) (bool, error) { 467 metadata := value.Metadata() 468 for k, v := range metadata { 469 if _, ok := tmp[k]; !ok { 470 tmp[k] = make(map[string]struct{}) 471 } 472 tmp[k][v] = struct{}{} 473 } 474 return true, nil 475 }) 476 477 for k, v := range tmp { 478 if _, ok := ret.Labels[k]; !ok { 479 ret.Labels[k] = &apimodel.StringList{Values: make([]string, 0, 4)} 480 } 481 482 for vv := range v { 483 ret.Labels[k].Values = append(ret.Labels[k].Values, vv) 484 } 485 } 486 487 return ret 488 } 489 490 func (ic *instanceCache) GetServicePorts(serviceID string) []*model.ServicePort { 491 return ic.servicePortsBucket.listPort(serviceID) 492 } 493 494 // iteratorInstancesProc 迭代指定的instance数据,id->instance 495 func iteratorInstancesProc(data *utils.SyncMap[string, *model.Instance], iterProc types.InstanceIterProc) error { 496 var ( 497 cont bool 498 err error 499 ) 500 501 proc := func(k string, v *model.Instance) bool { 502 cont, err = iterProc(k, v) 503 if err != nil { 504 return false 505 } 506 return cont 507 } 508 509 data.Range(proc) 510 return err 511 } 512 513 type cacheInstanceEvent struct { 514 item *model.Instance 515 eventType types.EventType 516 }