github.com/polarismesh/polaris@v1.17.8/cache/service/instance.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package service
    19  
    20  import (
    21  	"time"
    22  
    23  	apimodel "github.com/polarismesh/specification/source/go/api/v1/model"
    24  	apiservice "github.com/polarismesh/specification/source/go/api/v1/service_manage"
    25  	"go.uber.org/zap"
    26  	"golang.org/x/sync/singleflight"
    27  
    28  	types "github.com/polarismesh/polaris/cache/api"
    29  	"github.com/polarismesh/polaris/common/model"
    30  	"github.com/polarismesh/polaris/common/utils"
    31  	"github.com/polarismesh/polaris/store"
    32  )
    33  
    34  const (
    35  	// 定时全量对账
    36  	checkAllIntervalSec = 60
    37  )
    38  
    39  // instanceCache 实例缓存的类
    40  type instanceCache struct {
    41  	*types.BaseCache
    42  
    43  	svcCache        *serviceCache
    44  	storage         store.Store
    45  	lastMtimeLogged int64
    46  	// instanceid -> instance
    47  	ids *utils.SyncMap[string, *model.Instance]
    48  	// service id -> [instanceid ->instance]
    49  	services *utils.SyncMap[string, *utils.SyncMap[string, *model.Instance]]
    50  	// service id -> [instanceCount]
    51  	instanceCounts     *utils.SyncMap[string, *model.InstanceCount]
    52  	servicePortsBucket *servicePortsBucket
    53  	disableBusiness    bool
    54  	needMeta           bool
    55  	systemServiceID    []string
    56  	singleFlight       *singleflight.Group
    57  	instanceCount      int64
    58  	lastCheckAllTime   int64
    59  }
    60  
    61  // NewInstanceCache 新建一个instanceCache
    62  func NewInstanceCache(storage store.Store, cacheMgr types.CacheManager) types.InstanceCache {
    63  	return &instanceCache{
    64  		BaseCache:    types.NewBaseCache(storage, cacheMgr),
    65  		storage:      storage,
    66  		singleFlight: new(singleflight.Group),
    67  	}
    68  }
    69  
    70  // Initialize 初始化函数
    71  func (ic *instanceCache) Initialize(opt map[string]interface{}) error {
    72  	ic.svcCache = ic.BaseCache.CacheMgr.GetCacher(types.CacheService).(*serviceCache)
    73  	ic.ids = utils.NewSyncMap[string, *model.Instance]()
    74  	ic.services = utils.NewSyncMap[string, *utils.SyncMap[string, *model.Instance]]()
    75  	ic.instanceCounts = utils.NewSyncMap[string, *model.InstanceCount]()
    76  	ic.servicePortsBucket = newServicePortsBucket()
    77  	if opt == nil {
    78  		return nil
    79  	}
    80  	ic.disableBusiness, _ = opt["disableBusiness"].(bool)
    81  	ic.needMeta, _ = opt["needMeta"].(bool)
    82  	// 只加载系统服务
    83  	if ic.disableBusiness {
    84  		services, err := ic.getSystemServices()
    85  		if err != nil {
    86  			return err
    87  		}
    88  		ic.systemServiceID = make([]string, 0, len(services))
    89  		for _, service := range services {
    90  			if service.IsAlias() {
    91  				continue
    92  			}
    93  			ic.systemServiceID = append(ic.systemServiceID, service.ID)
    94  		}
    95  	}
    96  	return nil
    97  }
    98  
    99  // Update 更新缓存函数
   100  func (ic *instanceCache) Update() error {
   101  	err, _ := ic.singleUpdate()
   102  	return err
   103  }
   104  
   105  func (ic *instanceCache) singleUpdate() (error, bool) {
   106  	// 多个线程竞争,只有一个线程进行更新
   107  	_, err, shared := ic.singleFlight.Do(ic.Name(), func() (interface{}, error) {
   108  		return nil, ic.DoCacheUpdate(ic.Name(), ic.realUpdate)
   109  	})
   110  	return err, shared
   111  }
   112  
   113  func (ic *instanceCache) LastMtime() time.Time {
   114  	return ic.BaseCache.LastMtime(ic.Name())
   115  }
   116  
   117  func (ic *instanceCache) checkAll(tx store.Tx) {
   118  	curTimeSec := time.Now().Unix()
   119  	if curTimeSec-ic.lastCheckAllTime < checkAllIntervalSec {
   120  		return
   121  	}
   122  	defer func() {
   123  		ic.lastCheckAllTime = curTimeSec
   124  	}()
   125  	count, err := ic.storage.GetInstancesCountTx(tx)
   126  	if err != nil {
   127  		log.Errorf("[Cache][Instance] get instance count from storage err: %s", err.Error())
   128  		return
   129  	}
   130  	if ic.instanceCount == int64(count) {
   131  		return
   132  	}
   133  	log.Infof(
   134  		"[Cache][Instance] instance count not match, expect %d, actual %d, fallback to load all",
   135  		count, ic.instanceCount)
   136  	ic.ResetLastMtime(ic.Name())
   137  	ic.ResetLastFetchTime()
   138  }
   139  
   140  const maxLoadTimeDuration = 1 * time.Second
   141  
   142  func (ic *instanceCache) realUpdate() (map[string]time.Time, int64, error) {
   143  	// 拉取diff前的所有数据
   144  	start := time.Now()
   145  
   146  	tx, err := ic.storage.StartReadTx()
   147  	if err != nil {
   148  		if tx != nil {
   149  			_ = tx.Rollback()
   150  		}
   151  		log.Error("[Cache][Instance] begin transaction storage read tx", zap.Error(err))
   152  		return nil, -1, err
   153  	}
   154  
   155  	var instanceChangeEvents []*cacheInstanceEvent
   156  	defer func() {
   157  		_ = tx.Rollback()
   158  		for i := range instanceChangeEvents {
   159  			ic.Manager.OnEvent(instanceChangeEvents[i].item, instanceChangeEvents[i].eventType)
   160  		}
   161  		ic.reportMetricsInfo()
   162  	}()
   163  
   164  	if err := tx.CreateReadView(); err != nil {
   165  		log.Error("[Cache][Instance] create storage snapshot read view", zap.Error(err))
   166  		return nil, -1, err
   167  	}
   168  
   169  	events, lastMtimes, total, err := ic.handleUpdate(start, tx)
   170  	_ = tx.Commit()
   171  	instanceChangeEvents = events
   172  	return lastMtimes, total, err
   173  }
   174  
   175  func (ic *instanceCache) handleUpdate(start time.Time, tx store.Tx) ([]*cacheInstanceEvent, map[string]time.Time, int64, error) {
   176  	defer func() {
   177  		ic.lastMtimeLogged = types.LogLastMtime(ic.lastMtimeLogged, ic.LastMtime().Unix(), "Instance")
   178  		ic.checkAll(tx)
   179  	}()
   180  
   181  	instances, err := ic.storage.GetMoreInstances(tx, ic.LastFetchTime(), ic.IsFirstUpdate(),
   182  		ic.needMeta, ic.systemServiceID)
   183  
   184  	if err != nil {
   185  		log.Error("[Cache][Instance] update get storage more", zap.Error(err))
   186  		return nil, nil, -1, err
   187  	}
   188  
   189  	events, lastMtimes, update, del := ic.setInstances(instances)
   190  	log.Info("[Cache][Instance] get more instances",
   191  		zap.Int("pull-from-store", len(instances)), zap.Int("update", update), zap.Int("delete", del),
   192  		zap.Time("last", ic.LastMtime()), zap.Duration("used", time.Since(start)))
   193  	return events, lastMtimes, int64(len(instances)), err
   194  }
   195  
   196  // Clear 清理内部缓存数据
   197  func (ic *instanceCache) Clear() error {
   198  	ic.BaseCache.Clear()
   199  	ic.ids = utils.NewSyncMap[string, *model.Instance]()
   200  	ic.services = utils.NewSyncMap[string, *utils.SyncMap[string, *model.Instance]]()
   201  	ic.instanceCounts = utils.NewSyncMap[string, *model.InstanceCount]()
   202  	ic.servicePortsBucket.reset()
   203  	ic.instanceCount = 0
   204  	return nil
   205  }
   206  
   207  // Name 获取资源名称
   208  func (ic *instanceCache) Name() string {
   209  	return types.InstanceName
   210  }
   211  
   212  // getSystemServices 获取系统服务ID
   213  func (ic *instanceCache) getSystemServices() ([]*model.Service, error) {
   214  	services, err := ic.storage.GetSystemServices()
   215  	if err != nil {
   216  		log.Errorf("[Cache][Instance] get system services err: %s", err.Error())
   217  		return nil, err
   218  	}
   219  	return services, nil
   220  }
   221  
   222  // setInstances 保存instance到内存中
   223  // 返回:更新个数,删除个数
   224  func (ic *instanceCache) setInstances(ins map[string]*model.Instance) ([]*cacheInstanceEvent, map[string]time.Time, int, int) {
   225  	if len(ins) == 0 {
   226  		return nil, nil, 0, 0
   227  	}
   228  	events := make([]*cacheInstanceEvent, 0, len(ins))
   229  	addInstances := map[string]string{}
   230  	updateInstances := map[string]string{}
   231  	deleteInstances := map[string]string{}
   232  
   233  	lastMtime := ic.LastMtime().Unix()
   234  	update := 0
   235  	del := 0
   236  	affect := make(map[string]bool)
   237  	progress := 0
   238  	instanceCount := ic.instanceCount
   239  
   240  	for _, item := range ins {
   241  		progress++
   242  		if progress%50000 == 0 {
   243  			log.Infof("[Cache][Instance] set instances progress: %d / %d", progress, len(ins))
   244  		}
   245  		modifyTime := item.ModifyTime.Unix()
   246  		if lastMtime < modifyTime {
   247  			lastMtime = modifyTime
   248  		}
   249  		affect[item.ServiceID] = true
   250  		_, itemExist := ic.ids.Load(item.ID())
   251  		// 待删除的instance
   252  		if !item.Valid {
   253  			deleteInstances[item.ID()] = item.Revision()
   254  			del++
   255  			ic.ids.Delete(item.ID())
   256  			if itemExist {
   257  				events = append(events, &cacheInstanceEvent{
   258  					item:      item,
   259  					eventType: types.EventDeleted,
   260  				})
   261  				instanceCount--
   262  			}
   263  			value, ok := ic.services.Load(item.ServiceID)
   264  			if !ok {
   265  				continue
   266  			}
   267  
   268  			value.Delete(item.ID())
   269  			continue
   270  		}
   271  		// 有修改或者新增的数据
   272  		update++
   273  		// 缓存的instance map增加一个version和protocol字段
   274  		if item.Proto.Metadata == nil {
   275  			item.Proto.Metadata = make(map[string]string)
   276  		}
   277  
   278  		item = fillInternalLabels(item)
   279  
   280  		ic.ids.Store(item.ID(), item)
   281  		if !itemExist {
   282  			addInstances[item.ID()] = item.Revision()
   283  			instanceCount++
   284  			events = append(events, &cacheInstanceEvent{
   285  				item:      item,
   286  				eventType: types.EventCreated,
   287  			})
   288  		} else {
   289  			updateInstances[item.ID()] = item.Revision()
   290  			events = append(events, &cacheInstanceEvent{
   291  				item:      item,
   292  				eventType: types.EventUpdated,
   293  			})
   294  		}
   295  		value, ok := ic.services.Load(item.ServiceID)
   296  		if !ok {
   297  			value = utils.NewSyncMap[string, *model.Instance]()
   298  			ic.services.Store(item.ServiceID, value)
   299  		}
   300  
   301  		ic.servicePortsBucket.appendPort(item.ServiceID, item.Protocol(), item.Port())
   302  		value.Store(item.ID(), item)
   303  	}
   304  
   305  	if ic.instanceCount != instanceCount {
   306  		log.Infof("[Cache][Instance] instance count update from %d to %d",
   307  			ic.instanceCount, instanceCount)
   308  		ic.instanceCount = instanceCount
   309  	}
   310  
   311  	log.Info("[Cache][Instance] instances change info", zap.Any("add", addInstances),
   312  		zap.Any("update", updateInstances), zap.Any("delete", deleteInstances))
   313  
   314  	ic.postProcessUpdatedServices(affect)
   315  	ic.svcCache.notifyServiceCountReload(affect)
   316  	return events, map[string]time.Time{
   317  		ic.Name(): time.Unix(lastMtime, 0),
   318  	}, update, del
   319  }
   320  
   321  func fillInternalLabels(item *model.Instance) *model.Instance {
   322  	if len(item.Version()) > 0 {
   323  		item.Proto.Metadata["version"] = item.Version()
   324  	}
   325  	if len(item.Protocol()) > 0 {
   326  		item.Proto.Metadata["protocol"] = item.Protocol()
   327  	}
   328  
   329  	if item.Location() != nil {
   330  		item.Proto.Metadata["region"] = item.Location().GetRegion().GetValue()
   331  		item.Proto.Metadata["zone"] = item.Location().GetZone().GetValue()
   332  		item.Proto.Metadata["campus"] = item.Location().GetCampus().GetValue()
   333  	}
   334  	return item
   335  }
   336  
   337  func (ic *instanceCache) postProcessUpdatedServices(affect map[string]bool) {
   338  	progress := 0
   339  	for serviceID := range affect {
   340  		ic.svcCache.GetRevisionWorker().Notify(serviceID, true)
   341  		progress++
   342  		if progress%10000 == 0 {
   343  			log.Infof("[Cache][Instance] revision notify progress(%d / %d)", progress, len(affect))
   344  		}
   345  		// 构建服务数量统计
   346  		value, ok := ic.services.Load(serviceID)
   347  		if !ok {
   348  			ic.instanceCounts.Delete(serviceID)
   349  			continue
   350  		}
   351  		count := &model.InstanceCount{}
   352  		value.Range(func(key string, instance *model.Instance) bool {
   353  			count.TotalInstanceCount++
   354  			if isInstanceHealthy(instance) {
   355  				count.HealthyInstanceCount++
   356  			}
   357  			if instance.Proto.GetIsolate().GetValue() {
   358  				count.IsolateInstanceCount++
   359  			}
   360  			return true
   361  		})
   362  		if count.TotalInstanceCount == 0 {
   363  			ic.instanceCounts.Delete(serviceID)
   364  			continue
   365  		}
   366  		ic.instanceCounts.Store(serviceID, count)
   367  	}
   368  }
   369  
   370  func isInstanceHealthy(instance *model.Instance) bool {
   371  	return instance.Proto.GetHealthy().GetValue() && !instance.Proto.GetIsolate().GetValue()
   372  }
   373  
   374  // GetInstance 根据实例ID获取实例数据
   375  func (ic *instanceCache) GetInstance(instanceID string) *model.Instance {
   376  	if instanceID == "" {
   377  		return nil
   378  	}
   379  
   380  	value, ok := ic.ids.Load(instanceID)
   381  	if !ok {
   382  		return nil
   383  	}
   384  
   385  	return value
   386  }
   387  
   388  // GetInstancesByServiceID 根据ServiceID获取实例数据
   389  func (ic *instanceCache) GetInstancesByServiceID(serviceID string) []*model.Instance {
   390  	if serviceID == "" {
   391  		return nil
   392  	}
   393  
   394  	value, ok := ic.services.Load(serviceID)
   395  	if !ok {
   396  		return nil
   397  	}
   398  
   399  	var out []*model.Instance
   400  	value.Range(func(k string, v *model.Instance) bool {
   401  		out = append(out, v)
   402  		return true
   403  	})
   404  
   405  	return out
   406  }
   407  
   408  // GetInstancesCountByServiceID 根据服务ID获取实例数
   409  func (ic *instanceCache) GetInstancesCountByServiceID(serviceID string) model.InstanceCount {
   410  	if serviceID == "" {
   411  		return model.InstanceCount{}
   412  	}
   413  
   414  	value, ok := ic.instanceCounts.Load(serviceID)
   415  	if !ok {
   416  		return model.InstanceCount{}
   417  	}
   418  	return *value
   419  }
   420  
   421  // IteratorInstances 迭代所有的instance的函数
   422  func (ic *instanceCache) IteratorInstances(iterProc types.InstanceIterProc) error {
   423  	return iteratorInstancesProc(ic.ids, iterProc)
   424  }
   425  
   426  // IteratorInstancesWithService 根据服务ID进行迭代回调
   427  func (ic *instanceCache) IteratorInstancesWithService(serviceID string, iterProc types.InstanceIterProc) error {
   428  	if serviceID == "" {
   429  		return nil
   430  	}
   431  	value, ok := ic.services.Load(serviceID)
   432  	if !ok {
   433  		return nil
   434  	}
   435  
   436  	return iteratorInstancesProc(value, iterProc)
   437  }
   438  
   439  // GetInstancesCount 获取实例的个数
   440  func (ic *instanceCache) GetInstancesCount() int {
   441  	count := 0
   442  	ic.ids.Range(func(key string, value *model.Instance) bool {
   443  		count++
   444  		return true
   445  	})
   446  
   447  	return count
   448  }
   449  
   450  // GetInstanceLabels 获取某个服务下实例的所有标签信息集合
   451  func (ic *instanceCache) GetInstanceLabels(serviceID string) *apiservice.InstanceLabels {
   452  	if serviceID == "" {
   453  		return &apiservice.InstanceLabels{}
   454  	}
   455  
   456  	value, ok := ic.services.Load(serviceID)
   457  	if !ok {
   458  		return &apiservice.InstanceLabels{}
   459  	}
   460  
   461  	ret := &apiservice.InstanceLabels{
   462  		Labels: make(map[string]*apimodel.StringList),
   463  	}
   464  
   465  	tmp := make(map[string]map[string]struct{})
   466  	_ = iteratorInstancesProc(value, func(key string, value *model.Instance) (bool, error) {
   467  		metadata := value.Metadata()
   468  		for k, v := range metadata {
   469  			if _, ok := tmp[k]; !ok {
   470  				tmp[k] = make(map[string]struct{})
   471  			}
   472  			tmp[k][v] = struct{}{}
   473  		}
   474  		return true, nil
   475  	})
   476  
   477  	for k, v := range tmp {
   478  		if _, ok := ret.Labels[k]; !ok {
   479  			ret.Labels[k] = &apimodel.StringList{Values: make([]string, 0, 4)}
   480  		}
   481  
   482  		for vv := range v {
   483  			ret.Labels[k].Values = append(ret.Labels[k].Values, vv)
   484  		}
   485  	}
   486  
   487  	return ret
   488  }
   489  
   490  func (ic *instanceCache) GetServicePorts(serviceID string) []*model.ServicePort {
   491  	return ic.servicePortsBucket.listPort(serviceID)
   492  }
   493  
   494  // iteratorInstancesProc 迭代指定的instance数据,id->instance
   495  func iteratorInstancesProc(data *utils.SyncMap[string, *model.Instance], iterProc types.InstanceIterProc) error {
   496  	var (
   497  		cont bool
   498  		err  error
   499  	)
   500  
   501  	proc := func(k string, v *model.Instance) bool {
   502  		cont, err = iterProc(k, v)
   503  		if err != nil {
   504  			return false
   505  		}
   506  		return cont
   507  	}
   508  
   509  	data.Range(proc)
   510  	return err
   511  }
   512  
   513  type cacheInstanceEvent struct {
   514  	item      *model.Instance
   515  	eventType types.EventType
   516  }