dubbo.apache.org/dubbo-go/v3@v3.1.1/xds/utils/balancergroup/balancergroup.go (about) 1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * 20 * Copyright 2019 gRPC authors. 21 * 22 */ 23 24 // Package balancergroup implements a utility struct to bind multiple balancers 25 // into one balancer. 26 package balancergroup 27 28 import ( 29 "fmt" 30 "sync" 31 "time" 32 ) 33 34 import ( 35 dubbogoLogger "github.com/dubbogo/gost/log/logger" 36 37 "google.golang.org/grpc/balancer" 38 39 "google.golang.org/grpc/connectivity" 40 41 "google.golang.org/grpc/resolver" 42 ) 43 44 import ( 45 cache "dubbo.apache.org/dubbo-go/v3/xds/utils/xds_cache" 46 ) 47 48 // subBalancerWrapper is used to keep the configurations that will be used to start 49 // the underlying balancer. It can be called to start/stop the underlying 50 // balancer. 51 // 52 // When the config changes, it will pass the update to the underlying balancer 53 // if it exists. 54 // 55 // TODO: move to a separate file? 56 type subBalancerWrapper struct { 57 // subBalancerWrapper is passed to the sub-balancer as a ClientConn 58 // wrapper, only to keep the state and picker. When sub-balancer is 59 // restarted while in cache, the picker needs to be resent. 60 // 61 // It also contains the sub-balancer ID, so the parent balancer group can 62 // keep track of SubConn/pickers and the sub-balancers they belong to. Some 63 // of the actions are forwarded to the parent ClientConn with no change. 64 // Some are forward to balancer group with the sub-balancer ID. 65 balancer.ClientConn 66 id string 67 group *BalancerGroup 68 69 mu sync.Mutex 70 state balancer.State 71 72 // The static part of sub-balancer. Keeps balancerBuilders and addresses. 73 // To be used when restarting sub-balancer. 74 builder balancer.Builder 75 // Options to be passed to sub-balancer at the time of creation. 76 buildOpts balancer.BuildOptions 77 // ccState is a cache of the addresses/balancer config, so when the balancer 78 // is restarted after close, it will get the previous update. It's a pointer 79 // and is set to nil at init, so when the balancer is built for the first 80 // time (not a restart), it won't receive an empty update. Note that this 81 // isn't reset to nil when the underlying balancer is closed. 82 ccState *balancer.ClientConnState 83 // The dynamic part of sub-balancer. Only used when balancer group is 84 // started. Gets cleared when sub-balancer is closed. 85 balancer balancer.Balancer 86 } 87 88 // UpdateState overrides balancer.ClientConn, to keep state and picker. 89 func (sbc *subBalancerWrapper) UpdateState(state balancer.State) { 90 sbc.mu.Lock() 91 sbc.state = state 92 sbc.group.updateBalancerState(sbc.id, state) 93 sbc.mu.Unlock() 94 } 95 96 // NewSubConn overrides balancer.ClientConn, so balancer group can keep track of 97 // the relation between subconns and sub-balancers. 98 func (sbc *subBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 99 return sbc.group.newSubConn(sbc, addrs, opts) 100 } 101 102 func (sbc *subBalancerWrapper) updateBalancerStateWithCachedPicker() { 103 sbc.mu.Lock() 104 if sbc.state.Picker != nil { 105 sbc.group.updateBalancerState(sbc.id, sbc.state) 106 } 107 sbc.mu.Unlock() 108 } 109 110 func (sbc *subBalancerWrapper) startBalancer() { 111 b := sbc.builder.Build(sbc, sbc.buildOpts) 112 sbc.group.logger.Infof("Created child policy %p of type %v", b, sbc.builder.Name()) 113 sbc.balancer = b 114 if sbc.ccState != nil { 115 b.UpdateClientConnState(*sbc.ccState) 116 } 117 } 118 119 // exitIdle invokes the sub-balancer's ExitIdle method. Returns a boolean 120 // indicating whether or not the operation was completed. 121 func (sbc *subBalancerWrapper) exitIdle() (complete bool) { 122 b := sbc.balancer 123 if b == nil { 124 return true 125 } 126 if ei, ok := b.(balancer.ExitIdler); ok { 127 ei.ExitIdle() 128 return true 129 } 130 return false 131 } 132 133 func (sbc *subBalancerWrapper) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 134 b := sbc.balancer 135 if b == nil { 136 // This sub-balancer was closed. This can happen when EDS removes a 137 // locality. The balancer for this locality was already closed, and the 138 // SubConns are being deleted. But SubConn state change can still 139 // happen. 140 return 141 } 142 b.UpdateSubConnState(sc, state) 143 } 144 145 func (sbc *subBalancerWrapper) updateClientConnState(s balancer.ClientConnState) error { 146 sbc.ccState = &s 147 b := sbc.balancer 148 if b == nil { 149 // This sub-balancer was closed. This should never happen because 150 // sub-balancers are closed when the locality is removed from EDS, or 151 // the balancer group is closed. There should be no further address 152 // updates when either of this happened. 153 // 154 // This will be a common case with priority support, because a 155 // sub-balancer (and the whole balancer group) could be closed because 156 // it's the lower priority, but it can still get address updates. 157 return nil 158 } 159 return b.UpdateClientConnState(s) 160 } 161 162 func (sbc *subBalancerWrapper) resolverError(err error) { 163 b := sbc.balancer 164 if b == nil { 165 // This sub-balancer was closed. This should never happen because 166 // sub-balancers are closed when the locality is removed from EDS, or 167 // the balancer group is closed. There should be no further address 168 // updates when either of this happened. 169 // 170 // This will be a common case with priority support, because a 171 // sub-balancer (and the whole balancer group) could be closed because 172 // it's the lower priority, but it can still get address updates. 173 return 174 } 175 b.ResolverError(err) 176 } 177 178 func (sbc *subBalancerWrapper) stopBalancer() { 179 sbc.balancer.Close() 180 sbc.balancer = nil 181 } 182 183 // BalancerGroup takes a list of balancers, and make them into one balancer. 184 // 185 // Note that this struct doesn't implement balancer.Balancer, because it's not 186 // intended to be used directly as a balancer. It's expected to be used as a 187 // sub-balancer manager by a high level balancer. 188 // 189 // Updates from ClientConn are forwarded to sub-balancers 190 // - service config update 191 // - address update 192 // - subConn state change 193 // - find the corresponding balancer and forward 194 // 195 // Actions from sub-balances are forwarded to parent ClientConn 196 // - new/remove SubConn 197 // - picker update and health states change 198 // - sub-pickers are sent to an aggregator provided by the parent, which 199 // will group them into a group-picker. The aggregated connectivity state is 200 // also handled by the aggregator. 201 // - resolveNow 202 // 203 // Sub-balancers are only built when the balancer group is started. If the 204 // balancer group is closed, the sub-balancers are also closed. And it's 205 // guaranteed that no updates will be sent to parent ClientConn from a closed 206 // balancer group. 207 type BalancerGroup struct { 208 cc balancer.ClientConn 209 buildOpts balancer.BuildOptions 210 logger dubbogoLogger.Logger 211 212 // stateAggregator is where the state/picker updates will be sent to. It's 213 // provided by the parent balancer, to build a picker with all the 214 // sub-pickers. 215 stateAggregator BalancerStateAggregator 216 217 // outgoingMu guards all operations in the direction: 218 // ClientConn-->Sub-balancer. Including start, stop, resolver updates and 219 // SubConn state changes. 220 // 221 // The corresponding boolean outgoingStarted is used to stop further updates 222 // to sub-balancers after they are closed. 223 outgoingMu sync.Mutex 224 outgoingStarted bool 225 idToBalancerConfig map[string]*subBalancerWrapper 226 // Cache for sub-balancers when they are removed. 227 balancerCache *cache.TimeoutCache 228 229 // incomingMu is to make sure this balancer group doesn't send updates to cc 230 // after it's closed. 231 // 232 // We don't share the mutex to avoid deadlocks (e.g. a call to sub-balancer 233 // may call back to balancer group inline. It causes deaclock if they 234 // require the same mutex). 235 // 236 // We should never need to hold multiple locks at the same time in this 237 // struct. The case where two locks are held can only happen when the 238 // underlying balancer calls back into balancer group inline. So there's an 239 // implicit lock acquisition order that outgoingMu is locked before 240 // incomingMu. 241 242 // incomingMu guards all operations in the direction: 243 // Sub-balancer-->ClientConn. Including NewSubConn, RemoveSubConn. It also 244 // guards the map from SubConn to balancer ID, so updateSubConnState needs 245 // to hold it shortly to find the sub-balancer to forward the update. 246 // 247 // UpdateState is called by the balancer state aggretator, and it will 248 // decide when and whether to call. 249 // 250 // The corresponding boolean incomingStarted is used to stop further updates 251 // from sub-balancers after they are closed. 252 incomingMu sync.Mutex 253 incomingStarted bool // This boolean only guards calls back to ClientConn. 254 scToSubBalancer map[balancer.SubConn]*subBalancerWrapper 255 } 256 257 // DefaultSubBalancerCloseTimeout is defined as a variable instead of const for 258 // testing. 259 // 260 // TODO: make it a parameter for New(). 261 var DefaultSubBalancerCloseTimeout = 15 * time.Minute 262 263 // New creates a new BalancerGroup. Note that the BalancerGroup 264 // needs to be started to work. 265 func New(cc balancer.ClientConn, bOpts balancer.BuildOptions, stateAggregator BalancerStateAggregator, logger dubbogoLogger.Logger) *BalancerGroup { 266 return &BalancerGroup{ 267 cc: cc, 268 buildOpts: bOpts, 269 logger: logger, 270 stateAggregator: stateAggregator, 271 272 idToBalancerConfig: make(map[string]*subBalancerWrapper), 273 balancerCache: cache.NewTimeoutCache(DefaultSubBalancerCloseTimeout), 274 scToSubBalancer: make(map[balancer.SubConn]*subBalancerWrapper), 275 } 276 } 277 278 // Start starts the balancer group, including building all the sub-balancers, 279 // and send the existing addresses to them. 280 // 281 // A BalancerGroup can be closed and started later. When a BalancerGroup is 282 // closed, it can still receive address updates, which will be applied when 283 // restarted. 284 func (bg *BalancerGroup) Start() { 285 bg.incomingMu.Lock() 286 bg.incomingStarted = true 287 bg.incomingMu.Unlock() 288 289 bg.outgoingMu.Lock() 290 if bg.outgoingStarted { 291 bg.outgoingMu.Unlock() 292 return 293 } 294 295 for _, config := range bg.idToBalancerConfig { 296 config.startBalancer() 297 } 298 bg.outgoingStarted = true 299 bg.outgoingMu.Unlock() 300 } 301 302 // Add adds a balancer built by builder to the group, with given id. 303 func (bg *BalancerGroup) Add(id string, builder balancer.Builder) { 304 // Store data in static map, and then check to see if bg is started. 305 bg.outgoingMu.Lock() 306 var sbc *subBalancerWrapper 307 // If outgoingStarted is true, search in the cache. Otherwise, cache is 308 // guaranteed to be empty, searching is unnecessary. 309 if bg.outgoingStarted { 310 if old, ok := bg.balancerCache.Remove(id); ok { 311 sbc, _ = old.(*subBalancerWrapper) 312 if sbc != nil && sbc.builder != builder { 313 // If the sub-balancer in cache was built with a different 314 // balancer builder, don't use it, cleanup this old-balancer, 315 // and behave as sub-balancer is not found in cache. 316 // 317 // NOTE that this will also drop the cached addresses for this 318 // sub-balancer, which seems to be reasonable. 319 sbc.stopBalancer() 320 // cleanupSubConns must be done before the new balancer starts, 321 // otherwise new SubConns created by the new balancer might be 322 // removed by mistake. 323 bg.cleanupSubConns(sbc) 324 sbc = nil 325 } 326 } 327 } 328 if sbc == nil { 329 sbc = &subBalancerWrapper{ 330 ClientConn: bg.cc, 331 id: id, 332 group: bg, 333 builder: builder, 334 buildOpts: bg.buildOpts, 335 } 336 if bg.outgoingStarted { 337 // Only start the balancer if bg is started. Otherwise, we only keep the 338 // static data. 339 sbc.startBalancer() 340 } 341 } else { 342 // When brining back a sub-balancer from cache, re-send the cached 343 // picker and state. 344 sbc.updateBalancerStateWithCachedPicker() 345 } 346 bg.idToBalancerConfig[id] = sbc 347 bg.outgoingMu.Unlock() 348 } 349 350 // Remove removes the balancer with id from the group. 351 // 352 // But doesn't close the balancer. The balancer is kept in a cache, and will be 353 // closed after timeout. Cleanup work (closing sub-balancer and removing 354 // subconns) will be done after timeout. 355 func (bg *BalancerGroup) Remove(id string) { 356 bg.outgoingMu.Lock() 357 if sbToRemove, ok := bg.idToBalancerConfig[id]; ok { 358 if bg.outgoingStarted { 359 bg.balancerCache.Add(id, sbToRemove, func() { 360 // After timeout, when sub-balancer is removed from cache, need 361 // to close the underlying sub-balancer, and remove all its 362 // subconns. 363 bg.outgoingMu.Lock() 364 if bg.outgoingStarted { 365 sbToRemove.stopBalancer() 366 } 367 bg.outgoingMu.Unlock() 368 bg.cleanupSubConns(sbToRemove) 369 }) 370 } 371 delete(bg.idToBalancerConfig, id) 372 } else { 373 bg.logger.Infof("balancer group: trying to remove a non-existing locality from balancer group: %v", id) 374 } 375 bg.outgoingMu.Unlock() 376 } 377 378 // bg.remove(id) doesn't do cleanup for the sub-balancer. This function does 379 // cleanup after the timeout. 380 func (bg *BalancerGroup) cleanupSubConns(config *subBalancerWrapper) { 381 bg.incomingMu.Lock() 382 // Remove SubConns. This is only done after the balancer is 383 // actually closed. 384 // 385 // NOTE: if NewSubConn is called by this (closed) balancer later, the 386 // SubConn will be leaked. This shouldn't happen if the balancer 387 // implementation is correct. To make sure this never happens, we need to 388 // add another layer (balancer manager) between balancer group and the 389 // sub-balancers. 390 for sc, b := range bg.scToSubBalancer { 391 if b == config { 392 bg.cc.RemoveSubConn(sc) 393 delete(bg.scToSubBalancer, sc) 394 } 395 } 396 bg.incomingMu.Unlock() 397 } 398 399 // connect attempts to connect to all subConns belonging to sb. 400 func (bg *BalancerGroup) connect(sb *subBalancerWrapper) { 401 bg.incomingMu.Lock() 402 for sc, b := range bg.scToSubBalancer { 403 if b == sb { 404 sc.Connect() 405 } 406 } 407 bg.incomingMu.Unlock() 408 } 409 410 // Following are actions from the parent grpc.ClientConn, forward to sub-balancers. 411 412 // UpdateSubConnState handles the state for the subconn. It finds the 413 // corresponding balancer and forwards the update. 414 func (bg *BalancerGroup) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 415 bg.incomingMu.Lock() 416 config, ok := bg.scToSubBalancer[sc] 417 if !ok { 418 bg.incomingMu.Unlock() 419 return 420 } 421 if state.ConnectivityState == connectivity.Shutdown { 422 // Only delete sc from the map when state changed to Shutdown. 423 delete(bg.scToSubBalancer, sc) 424 } 425 bg.incomingMu.Unlock() 426 427 bg.outgoingMu.Lock() 428 config.updateSubConnState(sc, state) 429 bg.outgoingMu.Unlock() 430 } 431 432 // UpdateClientConnState handles ClientState (including balancer config and 433 // addresses) from resolver. It finds the balancer and forwards the update. 434 func (bg *BalancerGroup) UpdateClientConnState(id string, s balancer.ClientConnState) error { 435 bg.outgoingMu.Lock() 436 defer bg.outgoingMu.Unlock() 437 if config, ok := bg.idToBalancerConfig[id]; ok { 438 return config.updateClientConnState(s) 439 } 440 return nil 441 } 442 443 // ResolverError forwards resolver errors to all sub-balancers. 444 func (bg *BalancerGroup) ResolverError(err error) { 445 bg.outgoingMu.Lock() 446 for _, config := range bg.idToBalancerConfig { 447 config.resolverError(err) 448 } 449 bg.outgoingMu.Unlock() 450 } 451 452 // Following are actions from sub-balancers, forward to ClientConn. 453 454 // newSubConn: forward to ClientConn, and also create a map from sc to balancer, 455 // so state update will find the right balancer. 456 // 457 // One note about removing SubConn: only forward to ClientConn, but not delete 458 // from map. Delete sc from the map only when state changes to Shutdown. Since 459 // it's just forwarding the action, there's no need for a removeSubConn() 460 // wrapper function. 461 func (bg *BalancerGroup) newSubConn(config *subBalancerWrapper, addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 462 // NOTE: if balancer with id was already removed, this should also return 463 // error. But since we call balancer.stopBalancer when removing the balancer, this 464 // shouldn't happen. 465 bg.incomingMu.Lock() 466 if !bg.incomingStarted { 467 bg.incomingMu.Unlock() 468 return nil, fmt.Errorf("NewSubConn is called after balancer group is closed") 469 } 470 sc, err := bg.cc.NewSubConn(addrs, opts) 471 if err != nil { 472 bg.incomingMu.Unlock() 473 return nil, err 474 } 475 bg.scToSubBalancer[sc] = config 476 bg.incomingMu.Unlock() 477 return sc, nil 478 } 479 480 // updateBalancerState: forward the new state to balancer state aggregator. The 481 // aggregator will create an aggregated picker and an aggregated connectivity 482 // state, then forward to ClientConn. 483 func (bg *BalancerGroup) updateBalancerState(id string, state balancer.State) { 484 bg.logger.Infof("Balancer state update from locality %v, new state: %+v", id, state) 485 486 // Send new state to the aggregator, without holding the incomingMu. 487 // incomingMu is to protect all calls to the parent ClientConn, this update 488 // doesn't necessary trigger a call to ClientConn, and should already be 489 // protected by aggregator's mutex if necessary. 490 if bg.stateAggregator != nil { 491 bg.stateAggregator.UpdateState(id, state) 492 } 493 } 494 495 // Close closes the balancer. It stops sub-balancers, and removes the subconns. 496 // The BalancerGroup can be restarted later. 497 func (bg *BalancerGroup) Close() { 498 bg.incomingMu.Lock() 499 if bg.incomingStarted { 500 bg.incomingStarted = false 501 // Also remove all SubConns. 502 for sc := range bg.scToSubBalancer { 503 bg.cc.RemoveSubConn(sc) 504 delete(bg.scToSubBalancer, sc) 505 } 506 } 507 bg.incomingMu.Unlock() 508 509 // Clear(true) runs clear function to close sub-balancers in cache. It 510 // must be called out of outgoing mutex. 511 bg.balancerCache.Clear(true) 512 513 bg.outgoingMu.Lock() 514 if bg.outgoingStarted { 515 bg.outgoingStarted = false 516 for _, config := range bg.idToBalancerConfig { 517 config.stopBalancer() 518 } 519 } 520 bg.outgoingMu.Unlock() 521 } 522 523 // ExitIdle should be invoked when the parent LB policy's ExitIdle is invoked. 524 // It will trigger this on all sub-balancers, or reconnect their subconns if 525 // not supported. 526 func (bg *BalancerGroup) ExitIdle() { 527 bg.outgoingMu.Lock() 528 for _, config := range bg.idToBalancerConfig { 529 if !config.exitIdle() { 530 bg.connect(config) 531 } 532 } 533 bg.outgoingMu.Unlock() 534 } 535 536 // ExitIdleOne instructs the sub-balancer `id` to exit IDLE state, if 537 // appropriate and possible. 538 func (bg *BalancerGroup) ExitIdleOne(id string) { 539 bg.outgoingMu.Lock() 540 if config := bg.idToBalancerConfig[id]; config != nil { 541 if !config.exitIdle() { 542 bg.connect(config) 543 } 544 } 545 bg.outgoingMu.Unlock() 546 }