github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/grpc/internal/balancergroup/balancergroup.go (about) 1 /* 2 * Copyright 2019 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package balancergroup implements a utility struct to bind multiple balancers 18 // into one balancer. 19 package balancergroup 20 21 import ( 22 "fmt" 23 "sync" 24 "time" 25 26 "github.com/hxx258456/ccgo/grpc/balancer" 27 "github.com/hxx258456/ccgo/grpc/connectivity" 28 "github.com/hxx258456/ccgo/grpc/internal/cache" 29 "github.com/hxx258456/ccgo/grpc/internal/grpclog" 30 "github.com/hxx258456/ccgo/grpc/resolver" 31 ) 32 33 // subBalancerWrapper is used to keep the configurations that will be used to start 34 // the underlying balancer. It can be called to start/stop the underlying 35 // balancer. 36 // 37 // When the config changes, it will pass the update to the underlying balancer 38 // if it exists. 39 // 40 // TODO: move to a separate file? 41 type subBalancerWrapper struct { 42 // subBalancerWrapper is passed to the sub-balancer as a ClientConn 43 // wrapper, only to keep the state and picker. When sub-balancer is 44 // restarted while in cache, the picker needs to be resent. 45 // 46 // It also contains the sub-balancer ID, so the parent balancer group can 47 // keep track of SubConn/pickers and the sub-balancers they belong to. Some 48 // of the actions are forwarded to the parent ClientConn with no change. 49 // Some are forward to balancer group with the sub-balancer ID. 50 balancer.ClientConn 51 id string 52 group *BalancerGroup 53 54 mu sync.Mutex 55 state balancer.State 56 57 // The static part of sub-balancer. Keeps balancerBuilders and addresses. 58 // To be used when restarting sub-balancer. 59 builder balancer.Builder 60 // Options to be passed to sub-balancer at the time of creation. 61 buildOpts balancer.BuildOptions 62 // ccState is a cache of the addresses/balancer config, so when the balancer 63 // is restarted after close, it will get the previous update. It's a pointer 64 // and is set to nil at init, so when the balancer is built for the first 65 // time (not a restart), it won't receive an empty update. Note that this 66 // isn't reset to nil when the underlying balancer is closed. 67 ccState *balancer.ClientConnState 68 // The dynamic part of sub-balancer. Only used when balancer group is 69 // started. Gets cleared when sub-balancer is closed. 70 balancer balancer.Balancer 71 } 72 73 // UpdateState overrides balancer.ClientConn, to keep state and picker. 74 func (sbc *subBalancerWrapper) UpdateState(state balancer.State) { 75 sbc.mu.Lock() 76 sbc.state = state 77 sbc.group.updateBalancerState(sbc.id, state) 78 sbc.mu.Unlock() 79 } 80 81 // NewSubConn overrides balancer.ClientConn, so balancer group can keep track of 82 // the relation between subconns and sub-balancers. 83 func (sbc *subBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 84 return sbc.group.newSubConn(sbc, addrs, opts) 85 } 86 87 func (sbc *subBalancerWrapper) updateBalancerStateWithCachedPicker() { 88 sbc.mu.Lock() 89 if sbc.state.Picker != nil { 90 sbc.group.updateBalancerState(sbc.id, sbc.state) 91 } 92 sbc.mu.Unlock() 93 } 94 95 func (sbc *subBalancerWrapper) startBalancer() { 96 b := sbc.builder.Build(sbc, sbc.buildOpts) 97 sbc.group.logger.Infof("Created child policy %p of type %v", b, sbc.builder.Name()) 98 sbc.balancer = b 99 if sbc.ccState != nil { 100 b.UpdateClientConnState(*sbc.ccState) 101 } 102 } 103 104 // exitIdle invokes the sub-balancer's ExitIdle method. Returns a boolean 105 // indicating whether or not the operation was completed. 106 func (sbc *subBalancerWrapper) exitIdle() (complete bool) { 107 b := sbc.balancer 108 if b == nil { 109 return true 110 } 111 if ei, ok := b.(balancer.ExitIdler); ok { 112 ei.ExitIdle() 113 return true 114 } 115 return false 116 } 117 118 func (sbc *subBalancerWrapper) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 119 b := sbc.balancer 120 if b == nil { 121 // This sub-balancer was closed. This can happen when EDS removes a 122 // locality. The balancer for this locality was already closed, and the 123 // SubConns are being deleted. But SubConn state change can still 124 // happen. 125 return 126 } 127 b.UpdateSubConnState(sc, state) 128 } 129 130 func (sbc *subBalancerWrapper) updateClientConnState(s balancer.ClientConnState) error { 131 sbc.ccState = &s 132 b := sbc.balancer 133 if b == nil { 134 // This sub-balancer was closed. This should never happen because 135 // sub-balancers are closed when the locality is removed from EDS, or 136 // the balancer group is closed. There should be no further address 137 // updates when either of this happened. 138 // 139 // This will be a common case with priority support, because a 140 // sub-balancer (and the whole balancer group) could be closed because 141 // it's the lower priority, but it can still get address updates. 142 return nil 143 } 144 return b.UpdateClientConnState(s) 145 } 146 147 func (sbc *subBalancerWrapper) resolverError(err error) { 148 b := sbc.balancer 149 if b == nil { 150 // This sub-balancer was closed. This should never happen because 151 // sub-balancers are closed when the locality is removed from EDS, or 152 // the balancer group is closed. There should be no further address 153 // updates when either of this happened. 154 // 155 // This will be a common case with priority support, because a 156 // sub-balancer (and the whole balancer group) could be closed because 157 // it's the lower priority, but it can still get address updates. 158 return 159 } 160 b.ResolverError(err) 161 } 162 163 func (sbc *subBalancerWrapper) stopBalancer() { 164 sbc.balancer.Close() 165 sbc.balancer = nil 166 } 167 168 // BalancerGroup takes a list of balancers, and make them into one balancer. 169 // 170 // Note that this struct doesn't implement balancer.Balancer, because it's not 171 // intended to be used directly as a balancer. It's expected to be used as a 172 // sub-balancer manager by a high level balancer. 173 // 174 // Updates from ClientConn are forwarded to sub-balancers 175 // - service config update 176 // - address update 177 // - subConn state change 178 // - find the corresponding balancer and forward 179 // 180 // Actions from sub-balances are forwarded to parent ClientConn 181 // - new/remove SubConn 182 // - picker update and health states change 183 // - sub-pickers are sent to an aggregator provided by the parent, which 184 // will group them into a group-picker. The aggregated connectivity state is 185 // also handled by the aggregator. 186 // - resolveNow 187 // 188 // Sub-balancers are only built when the balancer group is started. If the 189 // balancer group is closed, the sub-balancers are also closed. And it's 190 // guaranteed that no updates will be sent to parent ClientConn from a closed 191 // balancer group. 192 type BalancerGroup struct { 193 cc balancer.ClientConn 194 buildOpts balancer.BuildOptions 195 logger *grpclog.PrefixLogger 196 197 // stateAggregator is where the state/picker updates will be sent to. It's 198 // provided by the parent balancer, to build a picker with all the 199 // sub-pickers. 200 stateAggregator BalancerStateAggregator 201 202 // outgoingMu guards all operations in the direction: 203 // ClientConn-->Sub-balancer. Including start, stop, resolver updates and 204 // SubConn state changes. 205 // 206 // The corresponding boolean outgoingStarted is used to stop further updates 207 // to sub-balancers after they are closed. 208 outgoingMu sync.Mutex 209 outgoingStarted bool 210 idToBalancerConfig map[string]*subBalancerWrapper 211 // Cache for sub-balancers when they are removed. 212 balancerCache *cache.TimeoutCache 213 214 // incomingMu is to make sure this balancer group doesn't send updates to cc 215 // after it's closed. 216 // 217 // We don't share the mutex to avoid deadlocks (e.g. a call to sub-balancer 218 // may call back to balancer group inline. It causes deaclock if they 219 // require the same mutex). 220 // 221 // We should never need to hold multiple locks at the same time in this 222 // struct. The case where two locks are held can only happen when the 223 // underlying balancer calls back into balancer group inline. So there's an 224 // implicit lock acquisition order that outgoingMu is locked before 225 // incomingMu. 226 227 // incomingMu guards all operations in the direction: 228 // Sub-balancer-->ClientConn. Including NewSubConn, RemoveSubConn. It also 229 // guards the map from SubConn to balancer ID, so updateSubConnState needs 230 // to hold it shortly to find the sub-balancer to forward the update. 231 // 232 // UpdateState is called by the balancer state aggretator, and it will 233 // decide when and whether to call. 234 // 235 // The corresponding boolean incomingStarted is used to stop further updates 236 // from sub-balancers after they are closed. 237 incomingMu sync.Mutex 238 incomingStarted bool // This boolean only guards calls back to ClientConn. 239 scToSubBalancer map[balancer.SubConn]*subBalancerWrapper 240 } 241 242 // DefaultSubBalancerCloseTimeout is defined as a variable instead of const for 243 // testing. 244 // 245 // TODO: make it a parameter for New(). 246 var DefaultSubBalancerCloseTimeout = 15 * time.Minute 247 248 // New creates a new BalancerGroup. Note that the BalancerGroup 249 // needs to be started to work. 250 func New(cc balancer.ClientConn, bOpts balancer.BuildOptions, stateAggregator BalancerStateAggregator, logger *grpclog.PrefixLogger) *BalancerGroup { 251 return &BalancerGroup{ 252 cc: cc, 253 buildOpts: bOpts, 254 logger: logger, 255 stateAggregator: stateAggregator, 256 257 idToBalancerConfig: make(map[string]*subBalancerWrapper), 258 balancerCache: cache.NewTimeoutCache(DefaultSubBalancerCloseTimeout), 259 scToSubBalancer: make(map[balancer.SubConn]*subBalancerWrapper), 260 } 261 } 262 263 // Start starts the balancer group, including building all the sub-balancers, 264 // and send the existing addresses to them. 265 // 266 // A BalancerGroup can be closed and started later. When a BalancerGroup is 267 // closed, it can still receive address updates, which will be applied when 268 // restarted. 269 func (bg *BalancerGroup) Start() { 270 bg.incomingMu.Lock() 271 bg.incomingStarted = true 272 bg.incomingMu.Unlock() 273 274 bg.outgoingMu.Lock() 275 if bg.outgoingStarted { 276 bg.outgoingMu.Unlock() 277 return 278 } 279 280 for _, config := range bg.idToBalancerConfig { 281 config.startBalancer() 282 } 283 bg.outgoingStarted = true 284 bg.outgoingMu.Unlock() 285 } 286 287 // Add adds a balancer built by builder to the group, with given id. 288 func (bg *BalancerGroup) Add(id string, builder balancer.Builder) { 289 // Store data in static map, and then check to see if bg is started. 290 bg.outgoingMu.Lock() 291 var sbc *subBalancerWrapper 292 // If outgoingStarted is true, search in the cache. Otherwise, cache is 293 // guaranteed to be empty, searching is unnecessary. 294 if bg.outgoingStarted { 295 if old, ok := bg.balancerCache.Remove(id); ok { 296 sbc, _ = old.(*subBalancerWrapper) 297 if sbc != nil && sbc.builder != builder { 298 // If the sub-balancer in cache was built with a different 299 // balancer builder, don't use it, cleanup this old-balancer, 300 // and behave as sub-balancer is not found in cache. 301 // 302 // NOTE that this will also drop the cached addresses for this 303 // sub-balancer, which seems to be reasonable. 304 sbc.stopBalancer() 305 // cleanupSubConns must be done before the new balancer starts, 306 // otherwise new SubConns created by the new balancer might be 307 // removed by mistake. 308 bg.cleanupSubConns(sbc) 309 sbc = nil 310 } 311 } 312 } 313 if sbc == nil { 314 sbc = &subBalancerWrapper{ 315 ClientConn: bg.cc, 316 id: id, 317 group: bg, 318 builder: builder, 319 buildOpts: bg.buildOpts, 320 } 321 if bg.outgoingStarted { 322 // Only start the balancer if bg is started. Otherwise, we only keep the 323 // static data. 324 sbc.startBalancer() 325 } 326 } else { 327 // When brining back a sub-balancer from cache, re-send the cached 328 // picker and state. 329 sbc.updateBalancerStateWithCachedPicker() 330 } 331 bg.idToBalancerConfig[id] = sbc 332 bg.outgoingMu.Unlock() 333 } 334 335 // Remove removes the balancer with id from the group. 336 // 337 // But doesn't close the balancer. The balancer is kept in a cache, and will be 338 // closed after timeout. Cleanup work (closing sub-balancer and removing 339 // subconns) will be done after timeout. 340 func (bg *BalancerGroup) Remove(id string) { 341 bg.outgoingMu.Lock() 342 if sbToRemove, ok := bg.idToBalancerConfig[id]; ok { 343 if bg.outgoingStarted { 344 bg.balancerCache.Add(id, sbToRemove, func() { 345 // After timeout, when sub-balancer is removed from cache, need 346 // to close the underlying sub-balancer, and remove all its 347 // subconns. 348 bg.outgoingMu.Lock() 349 if bg.outgoingStarted { 350 sbToRemove.stopBalancer() 351 } 352 bg.outgoingMu.Unlock() 353 bg.cleanupSubConns(sbToRemove) 354 }) 355 } 356 delete(bg.idToBalancerConfig, id) 357 } else { 358 bg.logger.Infof("balancer group: trying to remove a non-existing locality from balancer group: %v", id) 359 } 360 bg.outgoingMu.Unlock() 361 } 362 363 // bg.remove(id) doesn't do cleanup for the sub-balancer. This function does 364 // cleanup after the timeout. 365 func (bg *BalancerGroup) cleanupSubConns(config *subBalancerWrapper) { 366 bg.incomingMu.Lock() 367 // Remove SubConns. This is only done after the balancer is 368 // actually closed. 369 // 370 // NOTE: if NewSubConn is called by this (closed) balancer later, the 371 // SubConn will be leaked. This shouldn't happen if the balancer 372 // implementation is correct. To make sure this never happens, we need to 373 // add another layer (balancer manager) between balancer group and the 374 // sub-balancers. 375 for sc, b := range bg.scToSubBalancer { 376 if b == config { 377 bg.cc.RemoveSubConn(sc) 378 delete(bg.scToSubBalancer, sc) 379 } 380 } 381 bg.incomingMu.Unlock() 382 } 383 384 // connect attempts to connect to all subConns belonging to sb. 385 func (bg *BalancerGroup) connect(sb *subBalancerWrapper) { 386 bg.incomingMu.Lock() 387 for sc, b := range bg.scToSubBalancer { 388 if b == sb { 389 sc.Connect() 390 } 391 } 392 bg.incomingMu.Unlock() 393 } 394 395 // Following are actions from the parent grpc.ClientConn, forward to sub-balancers. 396 397 // UpdateSubConnState handles the state for the subconn. It finds the 398 // corresponding balancer and forwards the update. 399 func (bg *BalancerGroup) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 400 bg.incomingMu.Lock() 401 config, ok := bg.scToSubBalancer[sc] 402 if !ok { 403 bg.incomingMu.Unlock() 404 return 405 } 406 if state.ConnectivityState == connectivity.Shutdown { 407 // Only delete sc from the map when state changed to Shutdown. 408 delete(bg.scToSubBalancer, sc) 409 } 410 bg.incomingMu.Unlock() 411 412 bg.outgoingMu.Lock() 413 config.updateSubConnState(sc, state) 414 bg.outgoingMu.Unlock() 415 } 416 417 // UpdateClientConnState handles ClientState (including balancer config and 418 // addresses) from resolver. It finds the balancer and forwards the update. 419 func (bg *BalancerGroup) UpdateClientConnState(id string, s balancer.ClientConnState) error { 420 bg.outgoingMu.Lock() 421 defer bg.outgoingMu.Unlock() 422 if config, ok := bg.idToBalancerConfig[id]; ok { 423 return config.updateClientConnState(s) 424 } 425 return nil 426 } 427 428 // ResolverError forwards resolver errors to all sub-balancers. 429 func (bg *BalancerGroup) ResolverError(err error) { 430 bg.outgoingMu.Lock() 431 for _, config := range bg.idToBalancerConfig { 432 config.resolverError(err) 433 } 434 bg.outgoingMu.Unlock() 435 } 436 437 // Following are actions from sub-balancers, forward to ClientConn. 438 439 // newSubConn: forward to ClientConn, and also create a map from sc to balancer, 440 // so state update will find the right balancer. 441 // 442 // One note about removing SubConn: only forward to ClientConn, but not delete 443 // from map. Delete sc from the map only when state changes to Shutdown. Since 444 // it's just forwarding the action, there's no need for a removeSubConn() 445 // wrapper function. 446 func (bg *BalancerGroup) newSubConn(config *subBalancerWrapper, addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 447 // NOTE: if balancer with id was already removed, this should also return 448 // error. But since we call balancer.stopBalancer when removing the balancer, this 449 // shouldn't happen. 450 bg.incomingMu.Lock() 451 if !bg.incomingStarted { 452 bg.incomingMu.Unlock() 453 return nil, fmt.Errorf("NewSubConn is called after balancer group is closed") 454 } 455 sc, err := bg.cc.NewSubConn(addrs, opts) 456 if err != nil { 457 bg.incomingMu.Unlock() 458 return nil, err 459 } 460 bg.scToSubBalancer[sc] = config 461 bg.incomingMu.Unlock() 462 return sc, nil 463 } 464 465 // updateBalancerState: forward the new state to balancer state aggregator. The 466 // aggregator will create an aggregated picker and an aggregated connectivity 467 // state, then forward to ClientConn. 468 func (bg *BalancerGroup) updateBalancerState(id string, state balancer.State) { 469 bg.logger.Infof("Balancer state update from locality %v, new state: %+v", id, state) 470 471 // Send new state to the aggregator, without holding the incomingMu. 472 // incomingMu is to protect all calls to the parent ClientConn, this update 473 // doesn't necessary trigger a call to ClientConn, and should already be 474 // protected by aggregator's mutex if necessary. 475 if bg.stateAggregator != nil { 476 bg.stateAggregator.UpdateState(id, state) 477 } 478 } 479 480 // Close closes the balancer. It stops sub-balancers, and removes the subconns. 481 // The BalancerGroup can be restarted later. 482 func (bg *BalancerGroup) Close() { 483 bg.incomingMu.Lock() 484 if bg.incomingStarted { 485 bg.incomingStarted = false 486 // Also remove all SubConns. 487 for sc := range bg.scToSubBalancer { 488 bg.cc.RemoveSubConn(sc) 489 delete(bg.scToSubBalancer, sc) 490 } 491 } 492 bg.incomingMu.Unlock() 493 494 // Clear(true) runs clear function to close sub-balancers in cache. It 495 // must be called out of outgoing mutex. 496 bg.balancerCache.Clear(true) 497 498 bg.outgoingMu.Lock() 499 if bg.outgoingStarted { 500 bg.outgoingStarted = false 501 for _, config := range bg.idToBalancerConfig { 502 config.stopBalancer() 503 } 504 } 505 bg.outgoingMu.Unlock() 506 } 507 508 // ExitIdle should be invoked when the parent LB policy's ExitIdle is invoked. 509 // It will trigger this on all sub-balancers, or reconnect their subconns if 510 // not supported. 511 func (bg *BalancerGroup) ExitIdle() { 512 bg.outgoingMu.Lock() 513 for _, config := range bg.idToBalancerConfig { 514 if !config.exitIdle() { 515 bg.connect(config) 516 } 517 } 518 bg.outgoingMu.Unlock() 519 } 520 521 // ExitIdleOne instructs the sub-balancer `id` to exit IDLE state, if 522 // appropriate and possible. 523 func (bg *BalancerGroup) ExitIdleOne(id string) { 524 bg.outgoingMu.Lock() 525 if config := bg.idToBalancerConfig[id]; config != nil { 526 if !config.exitIdle() { 527 bg.connect(config) 528 } 529 } 530 bg.outgoingMu.Unlock() 531 }