github.com/cloudwego/kitex@v0.9.0/pkg/loadbalance/consist.go (about) 1 /* 2 * Copyright 2021 CloudWeGo Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package loadbalance 18 19 import ( 20 "context" 21 "sort" 22 "strconv" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/bytedance/gopkg/util/xxhash3" 28 "golang.org/x/sync/singleflight" 29 30 "github.com/cloudwego/kitex/pkg/discovery" 31 "github.com/cloudwego/kitex/pkg/gofunc" 32 "github.com/cloudwego/kitex/pkg/utils" 33 ) 34 35 /* 36 Benchmark results with different instance numbers when weight = 10 and virtual factor = 100: 37 BenchmarkNewConsistPicker_NoCache/10ins-16 6565 160670 ns/op 164750 B/op 5 allocs/op 38 BenchmarkNewConsistPicker_NoCache/100ins-16 571 1914666 ns/op 1611803 B/op 6 allocs/op 39 BenchmarkNewConsistPicker_NoCache/1000ins-16 45 23485916 ns/op 16067720 B/op 10 allocs/op 40 BenchmarkNewConsistPicker_NoCache/10000ins-16 4 251160920 ns/op 160405632 B/op 41 allocs/op 41 42 When there's 10000 instances which weight = 10 and virtual factor = 100, the time need to build is 251 ms. 43 */ 44 45 /* 46 type hints for sync.Map: 47 consistBalancer -> sync.Map[entry.CacheKey]*consistInfo 48 consistInfo -> sync.Map[hashed key]*consistResult 49 consistResult -> Primary, Replicas 50 consistPicker -> consistResult 51 */ 52 53 // KeyFunc should return a non-empty string that stands for the request within the given context. 54 type KeyFunc func(ctx context.Context, request interface{}) string 55 56 // ConsistentHashOption . 57 type ConsistentHashOption struct { 58 GetKey KeyFunc 59 60 // If it is set, replicas will be used when connect to the primary node fails. 61 // This brings extra mem and cpu cost. 62 // If it is not set, error will be returned immediately when connect fails. 63 Replica uint32 64 65 // The number of virtual nodes corresponding to each real node 66 // The larger the value, the higher the memory and computational cost, and the more balanced the load 67 // When the number of nodes is large, it can be set smaller; conversely, it can be set larger 68 // The median VirtualFactor * Weight (if Weighted is true) is recommended to be around 1000 69 // The recommended total number of virtual nodes is within 2000W (it takes 250ms to build once in the 1000W case, but it is theoretically fine to build in the background within 3s) 70 VirtualFactor uint32 71 72 // Whether to follow Weight for load balancing 73 // If false, Weight is ignored for each instance, and VirtualFactor virtual nodes are generated for indiscriminate load balancing 74 // if true, Weight() * VirtualFactor virtual nodes are generated for each instance 75 // Note that for instance with weight 0, no virtual nodes will be generated regardless of the VirtualFactor number 76 // It is recommended to set it to true, but be careful to reduce the VirtualFactor appropriately 77 Weighted bool 78 79 // Whether or not to perform expiration processing 80 // The implementation will cache all the keys 81 // If never expired it may cause memory to keep growing and eventually OOM 82 // Setting expiration will result in additional performance overhead 83 // Current implementations scan for deletions every minute, and delete once when the instance changes rebuild 84 // It is recommended to always set the value not less than two minutes 85 ExpireDuration time.Duration 86 } 87 88 // NewConsistentHashOption creates a default ConsistentHashOption. 89 func NewConsistentHashOption(f KeyFunc) ConsistentHashOption { 90 return ConsistentHashOption{ 91 GetKey: f, 92 Replica: 0, 93 VirtualFactor: 100, 94 Weighted: true, 95 ExpireDuration: 2 * time.Minute, 96 } 97 } 98 99 var ( 100 consistPickerPool sync.Pool 101 consistBalancers []*consistBalancer 102 consistBalancersLock sync.RWMutex 103 consistBalancerDaemonOnce sync.Once 104 ) 105 106 func init() { 107 consistPickerPool.New = newConsistPicker 108 } 109 110 type virtualNode struct { 111 hash uint64 112 RealNode *realNode 113 } 114 115 type realNode struct { 116 Ins discovery.Instance 117 } 118 119 type consistResult struct { 120 Primary discovery.Instance 121 Replicas []discovery.Instance 122 Touch atomic.Value 123 } 124 125 type consistInfo struct { 126 cachedConsistResult sync.Map 127 sfg singleflight.Group // To prevent multiple builds on the first request for the same key 128 129 realNodes []realNode 130 virtualNodes []virtualNode 131 } 132 133 type vNodeType struct { 134 s []virtualNode 135 } 136 137 func (v *vNodeType) Len() int { 138 return len(v.s) 139 } 140 141 func (v *vNodeType) Less(i, j int) bool { 142 return v.s[i].hash < v.s[j].hash 143 } 144 145 func (v *vNodeType) Swap(i, j int) { 146 v.s[i], v.s[j] = v.s[j], v.s[i] 147 } 148 149 type consistPicker struct { 150 cb *consistBalancer 151 info *consistInfo 152 index int 153 result *consistResult 154 } 155 156 func newConsistPicker() interface{} { 157 return &consistPicker{} 158 } 159 160 func (cp *consistPicker) zero() { 161 cp.info = nil 162 cp.cb = nil 163 cp.index = 0 164 cp.result = nil 165 } 166 167 func (cp *consistPicker) Recycle() { 168 cp.zero() 169 consistPickerPool.Put(cp) 170 } 171 172 // Next is not concurrency safe. 173 func (cp *consistPicker) Next(ctx context.Context, request interface{}) discovery.Instance { 174 if len(cp.info.realNodes) == 0 { 175 return nil 176 } 177 if cp.result == nil { 178 key := cp.cb.opt.GetKey(ctx, request) 179 if key == "" { 180 return nil 181 } 182 cp.result = cp.getConsistResult(xxhash3.HashString(key)) 183 cp.index = 0 184 return cp.result.Primary 185 } 186 if cp.index < len(cp.result.Replicas) { 187 cp.index++ 188 return cp.result.Replicas[cp.index-1] 189 } 190 return nil 191 } 192 193 func (cp *consistPicker) getConsistResult(key uint64) *consistResult { 194 var cr *consistResult 195 cri, ok := cp.info.cachedConsistResult.Load(key) 196 if !ok { 197 cri, _, _ = cp.info.sfg.Do(strconv.FormatUint(key, 10), func() (interface{}, error) { 198 cr := buildConsistResult(cp.cb, cp.info, key) 199 if cp.cb.opt.ExpireDuration > 0 { 200 cr.Touch.Store(time.Now()) 201 } 202 return cr, nil 203 }) 204 cp.info.cachedConsistResult.Store(key, cri) 205 } 206 cr = cri.(*consistResult) 207 if cp.cb.opt.ExpireDuration > 0 { 208 cr.Touch.Store(time.Now()) 209 } 210 return cr 211 } 212 213 func buildConsistResult(cb *consistBalancer, info *consistInfo, key uint64) *consistResult { 214 cr := &consistResult{} 215 index := sort.Search(len(info.virtualNodes), func(i int) bool { 216 return info.virtualNodes[i].hash > key 217 }) 218 // Back to the ring head (although a ring does not have a head) 219 if index == len(info.virtualNodes) { 220 index = 0 221 } 222 cr.Primary = info.virtualNodes[index].RealNode.Ins 223 replicas := int(cb.opt.Replica) 224 // remove the primary node 225 if len(info.realNodes)-1 < replicas { 226 replicas = len(info.realNodes) - 1 227 } 228 if replicas > 0 { 229 used := make(map[discovery.Instance]struct{}, replicas) // should be 1 + replicas - 1 230 used[cr.Primary] = struct{}{} 231 cr.Replicas = make([]discovery.Instance, replicas) 232 for i := 0; i < replicas; i++ { 233 // find the next instance which is not used 234 // replicas are adjusted before so we can guarantee that we can find one 235 for { 236 index++ 237 if index == len(info.virtualNodes) { 238 index = 0 239 } 240 ins := info.virtualNodes[index].RealNode.Ins 241 if _, ok := used[ins]; !ok { 242 used[ins] = struct{}{} 243 cr.Replicas[i] = ins 244 break 245 } 246 } 247 } 248 } 249 return cr 250 } 251 252 type consistBalancer struct { 253 cachedConsistInfo sync.Map 254 // The main purpose of this lock is to improve performance and prevent Change from being performed while expire 255 // which may cause Change to do a lot of extra computation and memory allocation 256 updateLock sync.Mutex 257 opt ConsistentHashOption 258 sfg singleflight.Group 259 } 260 261 // NewConsistBalancer creates a new consist balancer with the given option. 262 func NewConsistBalancer(opt ConsistentHashOption) Loadbalancer { 263 if opt.GetKey == nil { 264 panic("loadbalancer: new consistBalancer failed, getKey func cannot be nil") 265 } 266 if opt.VirtualFactor == 0 { 267 panic("loadbalancer: new consistBalancer failed, virtual factor must > 0") 268 } 269 cb := &consistBalancer{ 270 opt: opt, 271 } 272 if cb.opt.ExpireDuration > 0 { 273 cb.AddToDaemon() 274 } 275 return cb 276 } 277 278 // AddToDaemon adds a balancer to the daemon expire routine. 279 func (cb *consistBalancer) AddToDaemon() { 280 // do delete func 281 consistBalancerDaemonOnce.Do(func() { 282 gofunc.GoFunc(context.Background(), func() { 283 for range time.Tick(2 * time.Minute) { 284 consistBalancersLock.RLock() 285 now := time.Now() 286 for _, lb := range consistBalancers { 287 if lb.opt.ExpireDuration > 0 { 288 lb.updateLock.Lock() 289 lb.cachedConsistInfo.Range(func(key, value interface{}) bool { 290 ci := value.(*consistInfo) 291 ci.cachedConsistResult.Range(func(key, value interface{}) bool { 292 t := value.(*consistResult).Touch.Load().(time.Time) 293 if now.After(t.Add(cb.opt.ExpireDuration)) { 294 ci.cachedConsistResult.Delete(key) 295 } 296 return true 297 }) 298 return true 299 }) 300 lb.updateLock.Unlock() 301 } 302 } 303 consistBalancersLock.RUnlock() 304 } 305 }) 306 }) 307 308 consistBalancersLock.Lock() 309 consistBalancers = append(consistBalancers, cb) 310 consistBalancersLock.Unlock() 311 } 312 313 // GetPicker implements the Loadbalancer interface. 314 func (cb *consistBalancer) GetPicker(e discovery.Result) Picker { 315 var ci *consistInfo 316 if e.Cacheable { 317 cii, ok := cb.cachedConsistInfo.Load(e.CacheKey) 318 if !ok { 319 cii, _, _ = cb.sfg.Do(e.CacheKey, func() (interface{}, error) { 320 return cb.newConsistInfo(e), nil 321 }) 322 cb.cachedConsistInfo.Store(e.CacheKey, cii) 323 } 324 ci = cii.(*consistInfo) 325 } else { 326 ci = cb.newConsistInfo(e) 327 } 328 picker := consistPickerPool.Get().(*consistPicker) 329 picker.cb = cb 330 picker.info = ci 331 return picker 332 } 333 334 func (cb *consistBalancer) newConsistInfo(e discovery.Result) *consistInfo { 335 ci := &consistInfo{} 336 ci.realNodes, ci.virtualNodes = cb.buildNodes(e.Instances) 337 return ci 338 } 339 340 func (cb *consistBalancer) buildNodes(ins []discovery.Instance) ([]realNode, []virtualNode) { 341 ret := make([]realNode, len(ins)) 342 for i := range ins { 343 ret[i].Ins = ins[i] 344 } 345 return ret, cb.buildVirtualNodes(ret) 346 } 347 348 func (cb *consistBalancer) buildVirtualNodes(rNodes []realNode) []virtualNode { 349 totalLen := 0 350 for i := range rNodes { 351 totalLen += cb.getVirtualNodeLen(rNodes[i]) 352 } 353 354 ret := make([]virtualNode, totalLen) 355 if totalLen == 0 { 356 return ret 357 } 358 maxLen, maxSerial := 0, 0 359 for i := range rNodes { 360 if len(rNodes[i].Ins.Address().String()) > maxLen { 361 maxLen = len(rNodes[i].Ins.Address().String()) 362 } 363 if vNodeLen := cb.getVirtualNodeLen(rNodes[i]); vNodeLen > maxSerial { 364 maxSerial = vNodeLen 365 } 366 } 367 l := maxLen + 1 + utils.GetUIntLen(uint64(maxSerial)) // "$address + # + itoa(i)" 368 // pre-allocate []byte here, and reuse it to prevent memory allocation. 369 b := make([]byte, l) 370 371 // record the start index. 372 cur := 0 373 for i := range rNodes { 374 bAddr := utils.StringToSliceByte(rNodes[i].Ins.Address().String()) 375 // Assign the first few bits of b to string. 376 copy(b, bAddr) 377 378 // Initialize the last few bits, skipping '#'. 379 for j := len(bAddr) + 1; j < len(b); j++ { 380 b[j] = 0 381 } 382 b[len(bAddr)] = '#' 383 384 vLen := cb.getVirtualNodeLen(rNodes[i]) 385 for j := 0; j < vLen; j++ { 386 k := j 387 cnt := 0 388 // Assign values to b one by one, starting with the last one. 389 for k > 0 { 390 b[l-1-cnt] = byte(k % 10) 391 k /= 10 392 cnt++ 393 } 394 // At this point, the index inside ret should be cur + j. 395 index := cur + j 396 ret[index].hash = xxhash3.Hash(b) 397 ret[index].RealNode = &rNodes[i] 398 } 399 cur += vLen 400 } 401 sort.Sort(&vNodeType{s: ret}) 402 return ret 403 } 404 405 // get virtual node number from one realNode. 406 // if cb.opt.Weighted option is false, multiplier is 1, virtual node number is equal to VirtualFactor. 407 func (cb *consistBalancer) getVirtualNodeLen(rNode realNode) int { 408 if cb.opt.Weighted { 409 return rNode.Ins.Weight() * int(cb.opt.VirtualFactor) 410 } 411 return int(cb.opt.VirtualFactor) 412 } 413 414 func (cb *consistBalancer) updateConsistInfo(e discovery.Result) { 415 newInfo := cb.newConsistInfo(e) 416 infoI, loaded := cb.cachedConsistInfo.LoadOrStore(e.CacheKey, newInfo) 417 if !loaded { 418 return 419 } 420 info := infoI.(*consistInfo) 421 // Warm up. 422 // The reason for not modifying info directly is that there is no guarantee of concurrency security. 423 info.cachedConsistResult.Range(func(key, value interface{}) bool { 424 cr := buildConsistResult(cb, newInfo, key.(uint64)) 425 if cb.opt.ExpireDuration > 0 { 426 t := value.(*consistResult).Touch.Load().(time.Time) 427 if time.Now().After(t.Add(cb.opt.ExpireDuration)) { 428 return true 429 } 430 cr.Touch.Store(t) 431 } 432 newInfo.cachedConsistResult.Store(key, cr) 433 return true 434 }) 435 cb.cachedConsistInfo.Store(e.CacheKey, newInfo) 436 } 437 438 // Rebalance implements the Rebalancer interface. 439 func (cb *consistBalancer) Rebalance(change discovery.Change) { 440 if !change.Result.Cacheable { 441 return 442 } 443 // TODO: Use TreeMap to optimize performance when updating. 444 // Now, due to the lack of a good red-black tree implementation, we can only build the full amount once per update. 445 cb.updateLock.Lock() 446 cb.updateConsistInfo(change.Result) 447 cb.updateLock.Unlock() 448 } 449 450 // Delete implements the Rebalancer interface. 451 func (cb *consistBalancer) Delete(change discovery.Change) { 452 if !change.Result.Cacheable { 453 return 454 } 455 // FIXME: If Delete and Rebalance occur together (Discovery OnDelete and OnChange are triggered at the same time), 456 // it may cause the delete to fail and eventually lead to a resource leak. 457 cb.updateLock.Lock() 458 cb.cachedConsistInfo.Delete(change.Result.CacheKey) 459 cb.updateLock.Unlock() 460 } 461 462 func (cb *consistBalancer) Name() string { 463 return "consist" 464 }