github.com/polarismesh/polaris@v1.17.8/cache/service/faultdetect.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package service
    19  
    20  import (
    21  	"crypto/sha1"
    22  	"fmt"
    23  	"sort"
    24  	"sync"
    25  	"time"
    26  
    27  	"go.uber.org/zap"
    28  	"golang.org/x/sync/singleflight"
    29  
    30  	types "github.com/polarismesh/polaris/cache/api"
    31  	"github.com/polarismesh/polaris/common/model"
    32  	"github.com/polarismesh/polaris/common/utils"
    33  	"github.com/polarismesh/polaris/store"
    34  )
    35  
    36  type faultDetectCache struct {
    37  	*types.BaseCache
    38  
    39  	storage store.Store
    40  	// rules record id -> *model.FaultDetectRule
    41  	rules *utils.SyncMap[string, *model.FaultDetectRule]
    42  	// increment cache
    43  	// fetched service cache
    44  	// key1: namespace, key2: service
    45  	svcSpecificRules map[string]map[string]*model.ServiceWithFaultDetectRules
    46  	// key1: namespace
    47  	nsWildcardRules map[string]*model.ServiceWithFaultDetectRules
    48  	// all rules are wildcard specific
    49  	allWildcardRules *model.ServiceWithFaultDetectRules
    50  	lock             sync.RWMutex
    51  	singleFlight     singleflight.Group
    52  }
    53  
    54  // NewFaultDetectCache faultDetectCache constructor
    55  func NewFaultDetectCache(s store.Store, cacheMgr types.CacheManager) types.FaultDetectCache {
    56  	return &faultDetectCache{
    57  		BaseCache:        types.NewBaseCache(s, cacheMgr),
    58  		storage:          s,
    59  		rules:            utils.NewSyncMap[string, *model.FaultDetectRule](),
    60  		svcSpecificRules: make(map[string]map[string]*model.ServiceWithFaultDetectRules),
    61  		nsWildcardRules:  make(map[string]*model.ServiceWithFaultDetectRules),
    62  		allWildcardRules: model.NewServiceWithFaultDetectRules(model.ServiceKey{
    63  			Namespace: allMatched,
    64  			Name:      allMatched,
    65  		}),
    66  	}
    67  }
    68  
    69  // Initialize 实现Cache接口的函数
    70  func (f *faultDetectCache) Initialize(_ map[string]interface{}) error {
    71  	return nil
    72  }
    73  
    74  func (f *faultDetectCache) Update() error {
    75  	_, err, _ := f.singleFlight.Do(f.Name(), func() (interface{}, error) {
    76  		return nil, f.DoCacheUpdate(f.Name(), f.realUpdate)
    77  	})
    78  	return err
    79  }
    80  
    81  // update 实现Cache接口的函数
    82  func (f *faultDetectCache) realUpdate() (map[string]time.Time, int64, error) {
    83  	fdRules, err := f.storage.GetFaultDetectRulesForCache(f.LastFetchTime(), f.IsFirstUpdate())
    84  	if err != nil {
    85  		log.Errorf("[Cache] fault detect config cache update err:%s", err.Error())
    86  		return nil, -1, err
    87  	}
    88  	lastMtimes := f.setFaultDetectRules(fdRules)
    89  
    90  	return lastMtimes, int64(len(fdRules)), nil
    91  }
    92  
    93  // clear 实现Cache接口的函数
    94  func (f *faultDetectCache) Clear() error {
    95  	f.BaseCache.Clear()
    96  	f.lock.Lock()
    97  	f.allWildcardRules.Clear()
    98  	f.rules = utils.NewSyncMap[string, *model.FaultDetectRule]()
    99  	f.nsWildcardRules = make(map[string]*model.ServiceWithFaultDetectRules)
   100  	f.svcSpecificRules = make(map[string]map[string]*model.ServiceWithFaultDetectRules)
   101  	f.lock.Unlock()
   102  	return nil
   103  }
   104  
   105  // Name 实现资源名称
   106  func (f *faultDetectCache) Name() string {
   107  	return types.FaultDetectRuleName
   108  }
   109  
   110  // GetFaultDetectConfig 根据serviceID获取探测规则
   111  func (f *faultDetectCache) GetFaultDetectConfig(name string, namespace string) *model.ServiceWithFaultDetectRules {
   112  	log.Infof("GetFaultDetectConfig: name %s, namespace %s", name, namespace)
   113  	// check service specific
   114  	rules := f.checkServiceSpecificCache(name, namespace)
   115  	if nil != rules {
   116  		return rules
   117  	}
   118  	rules = f.checkNamespaceSpecificCache(namespace)
   119  	if nil != rules {
   120  		return rules
   121  	}
   122  	return f.allWildcardRules
   123  }
   124  
   125  func (f *faultDetectCache) checkServiceSpecificCache(
   126  	name string, namespace string) *model.ServiceWithFaultDetectRules {
   127  	f.lock.RLock()
   128  	defer f.lock.RUnlock()
   129  	log.Infof(
   130  		"checkServiceSpecificCache name %s, namespace %s, values %v", name, namespace, f.svcSpecificRules)
   131  	svcRules, ok := f.svcSpecificRules[namespace]
   132  	if ok {
   133  		return svcRules[name]
   134  	}
   135  	return nil
   136  }
   137  
   138  func (f *faultDetectCache) checkNamespaceSpecificCache(namespace string) *model.ServiceWithFaultDetectRules {
   139  	f.lock.RLock()
   140  	defer f.lock.RUnlock()
   141  	return f.nsWildcardRules[namespace]
   142  }
   143  
   144  func (f *faultDetectCache) reloadRevision(svcRules *model.ServiceWithFaultDetectRules) {
   145  	rulesCount := svcRules.CountFaultDetectRules()
   146  	if rulesCount == 0 {
   147  		svcRules.Revision = ""
   148  		return
   149  	}
   150  	revisions := make([]string, 0, rulesCount)
   151  	svcRules.IterateFaultDetectRules(func(rule *model.FaultDetectRule) {
   152  		revisions = append(revisions, rule.Revision)
   153  	})
   154  	sort.Strings(revisions)
   155  	h := sha1.New()
   156  	revision, err := types.ComputeRevisionBySlice(h, revisions)
   157  	if err != nil {
   158  		log.Errorf("[Server][Service][FaultDetector] compute revision service(%s) err: %s",
   159  			svcRules.Service, err.Error())
   160  		return
   161  	}
   162  	svcRules.Revision = revision
   163  }
   164  
   165  func (f *faultDetectCache) deleteAndReloadFaultDetectRules(svcRules *model.ServiceWithFaultDetectRules, id string) {
   166  	svcRules.DelFaultDetectRule(id)
   167  	f.reloadRevision(svcRules)
   168  }
   169  
   170  func (f *faultDetectCache) deleteFaultDetectRuleFromServiceCache(id string, svcKeys map[model.ServiceKey]bool) {
   171  	f.lock.Lock()
   172  	defer f.lock.Unlock()
   173  	if len(svcKeys) == 0 {
   174  		// all wildcard
   175  		f.deleteAndReloadFaultDetectRules(f.allWildcardRules, id)
   176  		for _, rules := range f.nsWildcardRules {
   177  			f.deleteAndReloadFaultDetectRules(rules, id)
   178  		}
   179  		for _, svcRules := range f.svcSpecificRules {
   180  			for _, rules := range svcRules {
   181  				f.deleteAndReloadFaultDetectRules(rules, id)
   182  			}
   183  		}
   184  		return
   185  	}
   186  	svcToReloads := make(map[model.ServiceKey]bool)
   187  	for svcKey := range svcKeys {
   188  		if svcKey.Name == allMatched {
   189  			rules, ok := f.nsWildcardRules[svcKey.Namespace]
   190  			if ok {
   191  				f.deleteAndReloadFaultDetectRules(rules, id)
   192  			}
   193  			svcRules, ok := f.svcSpecificRules[svcKey.Namespace]
   194  			if ok {
   195  				for svc := range svcRules {
   196  					svcToReloads[model.ServiceKey{Namespace: svcKey.Namespace, Name: svc}] = true
   197  				}
   198  			}
   199  		} else {
   200  			svcToReloads[svcKey] = true
   201  		}
   202  	}
   203  	if len(svcToReloads) > 0 {
   204  		for svcToReload := range svcToReloads {
   205  			svcRules, ok := f.svcSpecificRules[svcToReload.Namespace]
   206  			if ok {
   207  				rules, ok := svcRules[svcToReload.Name]
   208  				if ok {
   209  					f.deleteAndReloadFaultDetectRules(rules, id)
   210  				}
   211  			}
   212  		}
   213  	}
   214  }
   215  
   216  func (f *faultDetectCache) storeAndReloadFaultDetectRules(
   217  	svcRules *model.ServiceWithFaultDetectRules, cbRule *model.FaultDetectRule) {
   218  	svcRules.AddFaultDetectRule(cbRule)
   219  	f.reloadRevision(svcRules)
   220  }
   221  
   222  func createAndStoreServiceWithFaultDetectRules(svcKey model.ServiceKey, key string,
   223  	values map[string]*model.ServiceWithFaultDetectRules) *model.ServiceWithFaultDetectRules {
   224  	rules := model.NewServiceWithFaultDetectRules(svcKey)
   225  	values[key] = rules
   226  	return rules
   227  }
   228  
   229  func (f *faultDetectCache) storeFaultDetectRuleToServiceCache(
   230  	entry *model.FaultDetectRule, svcKeys map[model.ServiceKey]bool) {
   231  	f.lock.Lock()
   232  	defer f.lock.Unlock()
   233  	if len(svcKeys) == 0 {
   234  		// all wildcard
   235  		f.storeAndReloadFaultDetectRules(f.allWildcardRules, entry)
   236  		for _, rules := range f.nsWildcardRules {
   237  			f.storeAndReloadFaultDetectRules(rules, entry)
   238  		}
   239  		for _, svcRules := range f.svcSpecificRules {
   240  			for _, rules := range svcRules {
   241  				f.storeAndReloadFaultDetectRules(rules, entry)
   242  			}
   243  		}
   244  		return
   245  	}
   246  	svcToReloads := make(map[model.ServiceKey]bool)
   247  	for svcKey := range svcKeys {
   248  		if svcKey.Name == allMatched {
   249  			var wildcardRules *model.ServiceWithFaultDetectRules
   250  			var ok bool
   251  			wildcardRules, ok = f.nsWildcardRules[svcKey.Namespace]
   252  			if !ok {
   253  				wildcardRules = createAndStoreServiceWithFaultDetectRules(svcKey, svcKey.Namespace, f.nsWildcardRules)
   254  			}
   255  			f.storeAndReloadFaultDetectRules(wildcardRules, entry)
   256  			svcRules, ok := f.svcSpecificRules[svcKey.Namespace]
   257  			if ok {
   258  				for svc := range svcRules {
   259  					svcToReloads[model.ServiceKey{Namespace: svcKey.Namespace, Name: svc}] = true
   260  				}
   261  			}
   262  		} else {
   263  			svcToReloads[svcKey] = true
   264  		}
   265  	}
   266  	if len(svcToReloads) > 0 {
   267  		for svcToReload := range svcToReloads {
   268  			var rules *model.ServiceWithFaultDetectRules
   269  			var svcRules map[string]*model.ServiceWithFaultDetectRules
   270  			var ok bool
   271  			svcRules, ok = f.svcSpecificRules[svcToReload.Namespace]
   272  			if !ok {
   273  				svcRules = make(map[string]*model.ServiceWithFaultDetectRules)
   274  				f.svcSpecificRules[svcToReload.Namespace] = svcRules
   275  			}
   276  			rules, ok = svcRules[svcToReload.Name]
   277  			if !ok {
   278  				rules = createAndStoreServiceWithFaultDetectRules(svcToReload, svcToReload.Name, svcRules)
   279  			}
   280  			f.storeAndReloadFaultDetectRules(rules, entry)
   281  		}
   282  	}
   283  }
   284  
   285  func getServicesInvolveByFaultDetectRule(fdRule *model.FaultDetectRule) map[model.ServiceKey]bool {
   286  	svcKeys := make(map[model.ServiceKey]bool)
   287  	addService := func(name string, namespace string) {
   288  		if name == allMatched && namespace == allMatched {
   289  			return
   290  		}
   291  		svcKeys[model.ServiceKey{
   292  			Namespace: namespace,
   293  			Name:      name,
   294  		}] = true
   295  	}
   296  	addService(fdRule.DstService, fdRule.DstNamespace)
   297  	return svcKeys
   298  }
   299  
   300  // setCircuitBreaker 更新store的数据到cache中
   301  func (f *faultDetectCache) setFaultDetectRules(fdRules []*model.FaultDetectRule) map[string]time.Time {
   302  	if len(fdRules) == 0 {
   303  		return nil
   304  	}
   305  
   306  	lastMtime := f.LastMtime(f.Name()).Unix()
   307  
   308  	for _, fdRule := range fdRules {
   309  		oldRule, ok := f.rules.Load(fdRule.ID)
   310  		if ok {
   311  			// 对比规则前后绑定的服务是否出现了变化,清理掉之前所绑定的信息数据
   312  			if oldRule.IsServiceChange(fdRule) {
   313  				// 从老的规则中获取所有的 svcKeys 信息列表
   314  				svcKeys := getServicesInvolveByFaultDetectRule(oldRule)
   315  				log.Info("[Cache][FaultDetect] clean rule bind old service info",
   316  					zap.String("svc-keys", fmt.Sprintf("%#v", svcKeys)), zap.String("rule-id", fdRule.ID))
   317  				// 挨个清空
   318  				f.deleteFaultDetectRuleFromServiceCache(fdRule.ID, svcKeys)
   319  			}
   320  		}
   321  
   322  		if fdRule.ModifyTime.Unix() > lastMtime {
   323  			lastMtime = fdRule.ModifyTime.Unix()
   324  		}
   325  		svcKeys := getServicesInvolveByFaultDetectRule(fdRule)
   326  		if !fdRule.Valid {
   327  			f.rules.Delete(fdRule.ID)
   328  			f.deleteFaultDetectRuleFromServiceCache(fdRule.ID, svcKeys)
   329  			continue
   330  		}
   331  		f.rules.Store(fdRule.ID, fdRule)
   332  		f.storeFaultDetectRuleToServiceCache(fdRule, svcKeys)
   333  	}
   334  
   335  	return map[string]time.Time{
   336  		f.Name(): time.Unix(lastMtime, 0),
   337  	}
   338  }
   339  
   340  // GetFaultDetectRuleCount 获取探测规则总数
   341  func (f *faultDetectCache) GetFaultDetectRuleCount(fun func(k, v interface{}) bool) {
   342  	f.lock.RLock()
   343  	defer f.lock.RUnlock()
   344  
   345  	for k, v := range f.svcSpecificRules {
   346  		if !fun(k, v) {
   347  			break
   348  		}
   349  	}
   350  }