google.golang.org/grpc@v1.72.2/balancer/rls/balancer.go (about) 1 /* 2 * 3 * Copyright 2020 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package rls implements the RLS LB policy. 20 package rls 21 22 import ( 23 "encoding/json" 24 "errors" 25 "fmt" 26 "sync" 27 "sync/atomic" 28 "time" 29 "unsafe" 30 31 "google.golang.org/grpc/balancer" 32 "google.golang.org/grpc/connectivity" 33 estats "google.golang.org/grpc/experimental/stats" 34 "google.golang.org/grpc/grpclog" 35 "google.golang.org/grpc/internal" 36 "google.golang.org/grpc/internal/backoff" 37 "google.golang.org/grpc/internal/balancergroup" 38 "google.golang.org/grpc/internal/buffer" 39 internalgrpclog "google.golang.org/grpc/internal/grpclog" 40 "google.golang.org/grpc/internal/grpcsync" 41 "google.golang.org/grpc/internal/pretty" 42 "google.golang.org/grpc/resolver" 43 ) 44 45 const ( 46 // Name is the name of the RLS LB policy. 47 // 48 // It currently has an experimental suffix which would be removed once 49 // end-to-end testing of the policy is completed. 50 Name = internal.RLSLoadBalancingPolicyName 51 // Default frequency for data cache purging. 52 periodicCachePurgeFreq = time.Minute 53 ) 54 55 var ( 56 logger = grpclog.Component("rls") 57 errBalancerClosed = errors.New("rls LB policy is closed") 58 59 // Below defined vars for overriding in unit tests. 60 61 // Default exponential backoff strategy for data cache entries. 62 defaultBackoffStrategy = backoff.Strategy(backoff.DefaultExponential) 63 // Ticker used for periodic data cache purging. 64 dataCachePurgeTicker = func() *time.Ticker { return time.NewTicker(periodicCachePurgeFreq) } 65 // We want every cache entry to live in the cache for at least this 66 // duration. If we encounter a cache entry whose minimum expiration time is 67 // in the future, we abort the LRU pass, which may temporarily leave the 68 // cache being too large. This is necessary to ensure that in cases where 69 // the cache is too small, when we receive an RLS Response, we keep the 70 // resulting cache entry around long enough for the pending incoming 71 // requests to be re-processed through the new Picker. If we didn't do this, 72 // then we'd risk throwing away each RLS response as we receive it, in which 73 // case we would fail to actually route any of our incoming requests. 74 minEvictDuration = 5 * time.Second 75 76 // Following functions are no-ops in actual code, but can be overridden in 77 // tests to give tests visibility into exactly when certain events happen. 78 clientConnUpdateHook = func() {} 79 dataCachePurgeHook = func() {} 80 resetBackoffHook = func() {} 81 82 cacheEntriesMetric = estats.RegisterInt64Gauge(estats.MetricDescriptor{ 83 Name: "grpc.lb.rls.cache_entries", 84 Description: "EXPERIMENTAL. Number of entries in the RLS cache.", 85 Unit: "entry", 86 Labels: []string{"grpc.target", "grpc.lb.rls.server_target", "grpc.lb.rls.instance_uuid"}, 87 Default: false, 88 }) 89 cacheSizeMetric = estats.RegisterInt64Gauge(estats.MetricDescriptor{ 90 Name: "grpc.lb.rls.cache_size", 91 Description: "EXPERIMENTAL. The current size of the RLS cache.", 92 Unit: "By", 93 Labels: []string{"grpc.target", "grpc.lb.rls.server_target", "grpc.lb.rls.instance_uuid"}, 94 Default: false, 95 }) 96 defaultTargetPicksMetric = estats.RegisterInt64Count(estats.MetricDescriptor{ 97 Name: "grpc.lb.rls.default_target_picks", 98 Description: "EXPERIMENTAL. Number of LB picks sent to the default target.", 99 Unit: "pick", 100 Labels: []string{"grpc.target", "grpc.lb.rls.server_target", "grpc.lb.rls.data_plane_target", "grpc.lb.pick_result"}, 101 Default: false, 102 }) 103 targetPicksMetric = estats.RegisterInt64Count(estats.MetricDescriptor{ 104 Name: "grpc.lb.rls.target_picks", 105 Description: "EXPERIMENTAL. Number of LB picks sent to each RLS target. Note that if the default target is also returned by the RLS server, RPCs sent to that target from the cache will be counted in this metric, not in grpc.rls.default_target_picks.", 106 Unit: "pick", 107 Labels: []string{"grpc.target", "grpc.lb.rls.server_target", "grpc.lb.rls.data_plane_target", "grpc.lb.pick_result"}, 108 Default: false, 109 }) 110 failedPicksMetric = estats.RegisterInt64Count(estats.MetricDescriptor{ 111 Name: "grpc.lb.rls.failed_picks", 112 Description: "EXPERIMENTAL. Number of LB picks failed due to either a failed RLS request or the RLS channel being throttled.", 113 Unit: "pick", 114 Labels: []string{"grpc.target", "grpc.lb.rls.server_target"}, 115 Default: false, 116 }) 117 ) 118 119 func init() { 120 balancer.Register(&rlsBB{}) 121 } 122 123 type rlsBB struct{} 124 125 func (rlsBB) Name() string { 126 return Name 127 } 128 129 func (rlsBB) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 130 lb := &rlsBalancer{ 131 closed: grpcsync.NewEvent(), 132 done: grpcsync.NewEvent(), 133 cc: cc, 134 bopts: opts, 135 purgeTicker: dataCachePurgeTicker(), 136 dataCachePurgeHook: dataCachePurgeHook, 137 lbCfg: &lbConfig{}, 138 pendingMap: make(map[cacheKey]*backoffState), 139 childPolicies: make(map[string]*childPolicyWrapper), 140 updateCh: buffer.NewUnbounded(), 141 } 142 lb.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf("[rls-experimental-lb %p] ", lb)) 143 lb.dataCache = newDataCache(maxCacheSize, lb.logger, cc.MetricsRecorder(), opts.Target.String()) 144 lb.bg = balancergroup.New(balancergroup.Options{ 145 CC: cc, 146 BuildOpts: opts, 147 StateAggregator: lb, 148 Logger: lb.logger, 149 SubBalancerCloseTimeout: time.Duration(0), // Disable caching of removed child policies 150 }) 151 go lb.run() 152 return lb 153 } 154 155 // rlsBalancer implements the RLS LB policy. 156 type rlsBalancer struct { 157 closed *grpcsync.Event // Fires when Close() is invoked. Guarded by stateMu. 158 done *grpcsync.Event // Fires when Close() is done. 159 cc balancer.ClientConn 160 bopts balancer.BuildOptions 161 purgeTicker *time.Ticker 162 dataCachePurgeHook func() 163 logger *internalgrpclog.PrefixLogger 164 165 // If both cacheMu and stateMu need to be acquired, the former must be 166 // acquired first to prevent a deadlock. This order restriction is due to the 167 // fact that in places where we need to acquire both the locks, we always 168 // start off reading the cache. 169 170 // cacheMu guards access to the data cache and pending requests map. We 171 // cannot use an RWMutex here since even an operation like 172 // dataCache.getEntry() modifies the underlying LRU, which is implemented as 173 // a doubly linked list. 174 cacheMu sync.Mutex 175 dataCache *dataCache // Cache of RLS data. 176 pendingMap map[cacheKey]*backoffState // Map of pending RLS requests. 177 178 // stateMu guards access to all LB policy state. 179 stateMu sync.Mutex 180 lbCfg *lbConfig // Most recently received service config. 181 childPolicyBuilder balancer.Builder // Cached child policy builder. 182 resolverState resolver.State // Cached resolver state. 183 ctrlCh *controlChannel // Control channel to the RLS server. 184 bg *balancergroup.BalancerGroup 185 childPolicies map[string]*childPolicyWrapper 186 defaultPolicy *childPolicyWrapper 187 // A reference to the most recent picker sent to gRPC as part of a state 188 // update is cached in this field so that we can release the reference to the 189 // default child policy wrapper when a new picker is created. See 190 // sendNewPickerLocked() for details. 191 lastPicker *rlsPicker 192 // Set during UpdateClientConnState when pushing updates to child policies. 193 // Prevents state updates from child policies causing new pickers to be sent 194 // up the channel. Cleared after all child policies have processed the 195 // updates sent to them, after which a new picker is sent up the channel. 196 inhibitPickerUpdates bool 197 198 // Channel on which all updates are pushed. Processed in run(). 199 updateCh *buffer.Unbounded 200 } 201 202 type resumePickerUpdates struct { 203 done chan struct{} 204 } 205 206 // childPolicyIDAndState wraps a child policy id and its state update. 207 type childPolicyIDAndState struct { 208 id string 209 state balancer.State 210 } 211 212 type controlChannelReady struct{} 213 214 // run is a long-running goroutine which handles all the updates that the 215 // balancer wishes to handle. The appropriate updateHandler will push the update 216 // on to a channel that this goroutine will select on, thereby the handling of 217 // the update will happen asynchronously. 218 func (b *rlsBalancer) run() { 219 // We exit out of the for loop below only after `Close()` has been invoked. 220 // Firing the done event here will ensure that Close() returns only after 221 // all goroutines are done. 222 defer func() { b.done.Fire() }() 223 224 // Wait for purgeDataCache() goroutine to exit before returning from here. 225 doneCh := make(chan struct{}) 226 defer func() { 227 <-doneCh 228 }() 229 go b.purgeDataCache(doneCh) 230 231 for { 232 select { 233 case u, ok := <-b.updateCh.Get(): 234 if !ok { 235 return 236 } 237 b.updateCh.Load() 238 switch update := u.(type) { 239 case childPolicyIDAndState: 240 b.handleChildPolicyStateUpdate(update.id, update.state) 241 case controlChannelReady: 242 b.logger.Infof("Resetting backoff state after control channel getting back to READY") 243 b.cacheMu.Lock() 244 updatePicker := b.dataCache.resetBackoffState(&backoffState{bs: defaultBackoffStrategy}) 245 b.cacheMu.Unlock() 246 if updatePicker { 247 b.sendNewPicker() 248 } 249 resetBackoffHook() 250 case resumePickerUpdates: 251 b.stateMu.Lock() 252 b.logger.Infof("Resuming picker updates after config propagation to child policies") 253 b.inhibitPickerUpdates = false 254 b.sendNewPickerLocked() 255 close(update.done) 256 b.stateMu.Unlock() 257 default: 258 b.logger.Errorf("Unsupported update type %T", update) 259 } 260 case <-b.closed.Done(): 261 return 262 } 263 } 264 } 265 266 // purgeDataCache is a long-running goroutine which periodically deletes expired 267 // entries. An expired entry is one for which both the expiryTime and 268 // backoffExpiryTime are in the past. 269 func (b *rlsBalancer) purgeDataCache(doneCh chan struct{}) { 270 defer close(doneCh) 271 272 for { 273 select { 274 case <-b.closed.Done(): 275 return 276 case <-b.purgeTicker.C: 277 b.cacheMu.Lock() 278 updatePicker := b.dataCache.evictExpiredEntries() 279 b.cacheMu.Unlock() 280 if updatePicker { 281 b.sendNewPicker() 282 } 283 b.dataCachePurgeHook() 284 } 285 } 286 } 287 288 func (b *rlsBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error { 289 defer clientConnUpdateHook() 290 291 b.stateMu.Lock() 292 if b.closed.HasFired() { 293 b.stateMu.Unlock() 294 b.logger.Warningf("Received service config after balancer close: %s", pretty.ToJSON(ccs.BalancerConfig)) 295 return errBalancerClosed 296 } 297 298 newCfg := ccs.BalancerConfig.(*lbConfig) 299 if b.lbCfg.Equal(newCfg) { 300 b.stateMu.Unlock() 301 b.logger.Infof("New service config matches existing config") 302 return nil 303 } 304 305 b.logger.Infof("Delaying picker updates until config is propagated to and processed by child policies") 306 b.inhibitPickerUpdates = true 307 308 // When the RLS server name changes, the old control channel needs to be 309 // swapped out for a new one. All state associated with the throttling 310 // algorithm is stored on a per-control-channel basis; when we swap out 311 // channels, we also swap out the throttling state. 312 b.handleControlChannelUpdate(newCfg) 313 314 // Any changes to child policy name or configuration needs to be handled by 315 // either creating new child policies or pushing updates to existing ones. 316 b.resolverState = ccs.ResolverState 317 b.handleChildPolicyConfigUpdate(newCfg, &ccs) 318 319 // Resize the cache if the size in the config has changed. 320 resizeCache := newCfg.cacheSizeBytes != b.lbCfg.cacheSizeBytes 321 322 // Update the copy of the config in the LB policy before releasing the lock. 323 b.lbCfg = newCfg 324 b.stateMu.Unlock() 325 326 // We cannot do cache operations above because `cacheMu` needs to be grabbed 327 // before `stateMu` if we are to hold both locks at the same time. 328 b.cacheMu.Lock() 329 b.dataCache.updateRLSServerTarget(newCfg.lookupService) 330 if resizeCache { 331 // If the new config changes reduces the size of the data cache, we 332 // might have to evict entries to get the cache size down to the newly 333 // specified size. If we do evict an entry with valid backoff timer, 334 // the new picker needs to be sent to the channel to re-process any 335 // RPCs queued as a result of this backoff timer. 336 b.dataCache.resize(newCfg.cacheSizeBytes) 337 } 338 b.cacheMu.Unlock() 339 // Enqueue an event which will notify us when the above update has been 340 // propagated to all child policies, and the child policies have all 341 // processed their updates, and we have sent a picker update. 342 done := make(chan struct{}) 343 b.updateCh.Put(resumePickerUpdates{done: done}) 344 <-done 345 return nil 346 } 347 348 // handleControlChannelUpdate handles updates to service config fields which 349 // influence the control channel to the RLS server. 350 // 351 // Caller must hold lb.stateMu. 352 func (b *rlsBalancer) handleControlChannelUpdate(newCfg *lbConfig) { 353 if newCfg.lookupService == b.lbCfg.lookupService && newCfg.lookupServiceTimeout == b.lbCfg.lookupServiceTimeout { 354 return 355 } 356 357 // Create a new control channel and close the existing one. 358 b.logger.Infof("Creating control channel to RLS server at: %v", newCfg.lookupService) 359 backToReadyFn := func() { 360 b.updateCh.Put(controlChannelReady{}) 361 } 362 ctrlCh, err := newControlChannel(newCfg.lookupService, newCfg.controlChannelServiceConfig, newCfg.lookupServiceTimeout, b.bopts, backToReadyFn) 363 if err != nil { 364 // This is very uncommon and usually represents a non-transient error. 365 // There is not much we can do here other than wait for another update 366 // which might fix things. 367 b.logger.Errorf("Failed to create control channel to %q: %v", newCfg.lookupService, err) 368 return 369 } 370 if b.ctrlCh != nil { 371 b.ctrlCh.close() 372 } 373 b.ctrlCh = ctrlCh 374 } 375 376 // handleChildPolicyConfigUpdate handles updates to service config fields which 377 // influence child policy configuration. 378 // 379 // Caller must hold lb.stateMu. 380 func (b *rlsBalancer) handleChildPolicyConfigUpdate(newCfg *lbConfig, ccs *balancer.ClientConnState) { 381 // Update child policy builder first since other steps are dependent on this. 382 if b.childPolicyBuilder == nil || b.childPolicyBuilder.Name() != newCfg.childPolicyName { 383 b.logger.Infof("Child policy changed to %q", newCfg.childPolicyName) 384 b.childPolicyBuilder = balancer.Get(newCfg.childPolicyName) 385 for _, cpw := range b.childPolicies { 386 // If the child policy has changed, we need to remove the old policy 387 // from the BalancerGroup and add a new one. The BalancerGroup takes 388 // care of closing the old one in this case. 389 b.bg.Remove(cpw.target) 390 b.bg.Add(cpw.target, b.childPolicyBuilder) 391 } 392 } 393 394 configSentToDefault := false 395 if b.lbCfg.defaultTarget != newCfg.defaultTarget { 396 // If the default target has changed, create a new childPolicyWrapper for 397 // the new target if required. If a new wrapper is created, add it to the 398 // childPolicies map and the BalancerGroup. 399 b.logger.Infof("Default target in LB config changing from %q to %q", b.lbCfg.defaultTarget, newCfg.defaultTarget) 400 cpw := b.childPolicies[newCfg.defaultTarget] 401 if cpw == nil { 402 cpw = newChildPolicyWrapper(newCfg.defaultTarget) 403 b.childPolicies[newCfg.defaultTarget] = cpw 404 b.bg.Add(newCfg.defaultTarget, b.childPolicyBuilder) 405 b.logger.Infof("Child policy %q added to BalancerGroup", newCfg.defaultTarget) 406 } 407 if err := b.buildAndPushChildPolicyConfigs(newCfg.defaultTarget, newCfg, ccs); err != nil { 408 cpw.lamify(err) 409 } 410 411 // If an old default exists, release its reference. If this was the last 412 // reference, remove the child policy from the BalancerGroup and remove the 413 // corresponding entry the childPolicies map. 414 if b.defaultPolicy != nil { 415 if b.defaultPolicy.releaseRef() { 416 delete(b.childPolicies, b.lbCfg.defaultTarget) 417 b.bg.Remove(b.defaultPolicy.target) 418 } 419 } 420 b.defaultPolicy = cpw 421 configSentToDefault = true 422 } 423 424 // No change in configuration affecting child policies. Return early. 425 if b.lbCfg.childPolicyName == newCfg.childPolicyName && b.lbCfg.childPolicyTargetField == newCfg.childPolicyTargetField && childPolicyConfigEqual(b.lbCfg.childPolicyConfig, newCfg.childPolicyConfig) { 426 return 427 } 428 429 // If fields affecting child policy configuration have changed, the changes 430 // are pushed to the childPolicyWrapper which handles them appropriately. 431 for _, cpw := range b.childPolicies { 432 if configSentToDefault && cpw.target == newCfg.defaultTarget { 433 // Default target has already been taken care of. 434 continue 435 } 436 if err := b.buildAndPushChildPolicyConfigs(cpw.target, newCfg, ccs); err != nil { 437 cpw.lamify(err) 438 } 439 } 440 } 441 442 // buildAndPushChildPolicyConfigs builds the final child policy configuration by 443 // adding the `targetField` to the base child policy configuration received in 444 // RLS LB policy configuration. The `targetField` is set to target and 445 // configuration is pushed to the child policy through the BalancerGroup. 446 // 447 // Caller must hold lb.stateMu. 448 func (b *rlsBalancer) buildAndPushChildPolicyConfigs(target string, newCfg *lbConfig, ccs *balancer.ClientConnState) error { 449 jsonTarget, err := json.Marshal(target) 450 if err != nil { 451 return fmt.Errorf("failed to marshal child policy target %q: %v", target, err) 452 } 453 454 config := newCfg.childPolicyConfig 455 targetField := newCfg.childPolicyTargetField 456 config[targetField] = jsonTarget 457 jsonCfg, err := json.Marshal(config) 458 if err != nil { 459 return fmt.Errorf("failed to marshal child policy config %+v: %v", config, err) 460 } 461 462 parser, _ := b.childPolicyBuilder.(balancer.ConfigParser) 463 parsedCfg, err := parser.ParseConfig(jsonCfg) 464 if err != nil { 465 return fmt.Errorf("childPolicy config parsing failed: %v", err) 466 } 467 468 state := balancer.ClientConnState{ResolverState: ccs.ResolverState, BalancerConfig: parsedCfg} 469 b.logger.Infof("Pushing new state to child policy %q: %+v", target, state) 470 if err := b.bg.UpdateClientConnState(target, state); err != nil { 471 b.logger.Warningf("UpdateClientConnState(%q, %+v) failed : %v", target, ccs, err) 472 } 473 return nil 474 } 475 476 func (b *rlsBalancer) ResolverError(err error) { 477 b.bg.ResolverError(err) 478 } 479 480 func (b *rlsBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 481 b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state) 482 } 483 484 func (b *rlsBalancer) Close() { 485 b.stateMu.Lock() 486 b.closed.Fire() 487 b.purgeTicker.Stop() 488 if b.ctrlCh != nil { 489 b.ctrlCh.close() 490 } 491 b.bg.Close() 492 b.stateMu.Unlock() 493 494 b.cacheMu.Lock() 495 b.dataCache.stop() 496 b.cacheMu.Unlock() 497 498 b.updateCh.Close() 499 500 <-b.done.Done() 501 } 502 503 func (b *rlsBalancer) ExitIdle() { 504 b.bg.ExitIdle() 505 } 506 507 // sendNewPickerLocked pushes a new picker on to the channel. 508 // 509 // Note that regardless of what connectivity state is reported, the policy will 510 // return its own picker, and not a picker that unconditionally queues 511 // (typically used for IDLE or CONNECTING) or a picker that unconditionally 512 // fails (typically used for TRANSIENT_FAILURE). This is required because, 513 // irrespective of the connectivity state, we need to able to perform RLS 514 // lookups for incoming RPCs and affect the status of queued RPCs based on the 515 // receipt of RLS responses. 516 // 517 // Caller must hold lb.stateMu. 518 func (b *rlsBalancer) sendNewPickerLocked() { 519 aggregatedState := b.aggregatedConnectivityState() 520 521 // Acquire a separate reference for the picker. This is required to ensure 522 // that the wrapper held by the old picker is not closed when the default 523 // target changes in the config, and a new wrapper is created for the new 524 // default target. See handleChildPolicyConfigUpdate() for how config changes 525 // affecting the default target are handled. 526 if b.defaultPolicy != nil { 527 b.defaultPolicy.acquireRef() 528 } 529 530 picker := &rlsPicker{ 531 kbm: b.lbCfg.kbMap, 532 origEndpoint: b.bopts.Target.Endpoint(), 533 lb: b, 534 defaultPolicy: b.defaultPolicy, 535 ctrlCh: b.ctrlCh, 536 maxAge: b.lbCfg.maxAge, 537 staleAge: b.lbCfg.staleAge, 538 bg: b.bg, 539 rlsServerTarget: b.lbCfg.lookupService, 540 grpcTarget: b.bopts.Target.String(), 541 metricsRecorder: b.cc.MetricsRecorder(), 542 } 543 picker.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf("[rls-picker %p] ", picker)) 544 state := balancer.State{ 545 ConnectivityState: aggregatedState, 546 Picker: picker, 547 } 548 549 if !b.inhibitPickerUpdates { 550 b.logger.Infof("New balancer.State: %+v", state) 551 b.cc.UpdateState(state) 552 } else { 553 b.logger.Infof("Delaying picker update: %+v", state) 554 } 555 556 if b.lastPicker != nil { 557 if b.defaultPolicy != nil { 558 b.defaultPolicy.releaseRef() 559 } 560 } 561 b.lastPicker = picker 562 } 563 564 func (b *rlsBalancer) sendNewPicker() { 565 b.stateMu.Lock() 566 defer b.stateMu.Unlock() 567 if b.closed.HasFired() { 568 return 569 } 570 b.sendNewPickerLocked() 571 } 572 573 // The aggregated connectivity state reported is determined as follows: 574 // - If there is at least one child policy in state READY, the connectivity 575 // state is READY. 576 // - Otherwise, if there is at least one child policy in state CONNECTING, the 577 // connectivity state is CONNECTING. 578 // - Otherwise, if there is at least one child policy in state IDLE, the 579 // connectivity state is IDLE. 580 // - Otherwise, all child policies are in TRANSIENT_FAILURE, and the 581 // connectivity state is TRANSIENT_FAILURE. 582 // 583 // If the RLS policy has no child policies and no configured default target, 584 // then we will report connectivity state IDLE. 585 // 586 // Caller must hold lb.stateMu. 587 func (b *rlsBalancer) aggregatedConnectivityState() connectivity.State { 588 if len(b.childPolicies) == 0 && b.lbCfg.defaultTarget == "" { 589 return connectivity.Idle 590 } 591 592 var readyN, connectingN, idleN int 593 for _, cpw := range b.childPolicies { 594 state := (*balancer.State)(atomic.LoadPointer(&cpw.state)) 595 switch state.ConnectivityState { 596 case connectivity.Ready: 597 readyN++ 598 case connectivity.Connecting: 599 connectingN++ 600 case connectivity.Idle: 601 idleN++ 602 } 603 } 604 605 switch { 606 case readyN > 0: 607 return connectivity.Ready 608 case connectingN > 0: 609 return connectivity.Connecting 610 case idleN > 0: 611 return connectivity.Idle 612 default: 613 return connectivity.TransientFailure 614 } 615 } 616 617 // UpdateState is a implementation of the balancergroup.BalancerStateAggregator 618 // interface. The actual state aggregation functionality is handled 619 // asynchronously. This method only pushes the state update on to channel read 620 // and dispatched by the run() goroutine. 621 func (b *rlsBalancer) UpdateState(id string, state balancer.State) { 622 b.updateCh.Put(childPolicyIDAndState{id: id, state: state}) 623 } 624 625 // handleChildPolicyStateUpdate provides the state aggregator functionality for 626 // the BalancerGroup. 627 // 628 // This method is invoked by the BalancerGroup whenever a child policy sends a 629 // state update. We cache the child policy's connectivity state and picker for 630 // two reasons: 631 // - to suppress connectivity state transitions from TRANSIENT_FAILURE to states 632 // other than READY 633 // - to delegate picks to child policies 634 func (b *rlsBalancer) handleChildPolicyStateUpdate(id string, newState balancer.State) { 635 b.stateMu.Lock() 636 defer b.stateMu.Unlock() 637 638 cpw := b.childPolicies[id] 639 if cpw == nil { 640 // All child policies start with an entry in the map. If ID is not in 641 // map, it's either been removed, or never existed. 642 b.logger.Warningf("Received state update %+v for missing child policy %q", newState, id) 643 return 644 } 645 646 oldState := (*balancer.State)(atomic.LoadPointer(&cpw.state)) 647 if oldState.ConnectivityState == connectivity.TransientFailure && newState.ConnectivityState == connectivity.Connecting { 648 // Ignore state transitions from TRANSIENT_FAILURE to CONNECTING, and thus 649 // fail pending RPCs instead of queuing them indefinitely when all 650 // subChannels are failing, even if the subChannels are bouncing back and 651 // forth between CONNECTING and TRANSIENT_FAILURE. 652 return 653 } 654 atomic.StorePointer(&cpw.state, unsafe.Pointer(&newState)) 655 b.logger.Infof("Child policy %q has new state %+v", id, newState) 656 b.sendNewPickerLocked() 657 } 658 659 // acquireChildPolicyReferences attempts to acquire references to 660 // childPolicyWrappers corresponding to the passed in targets. If there is no 661 // childPolicyWrapper corresponding to one of the targets, a new one is created 662 // and added to the BalancerGroup. 663 func (b *rlsBalancer) acquireChildPolicyReferences(targets []string) []*childPolicyWrapper { 664 b.stateMu.Lock() 665 var newChildPolicies []*childPolicyWrapper 666 for _, target := range targets { 667 // If the target exists in the LB policy's childPolicies map. a new 668 // reference is taken here and added to the new list. 669 if cpw := b.childPolicies[target]; cpw != nil { 670 cpw.acquireRef() 671 newChildPolicies = append(newChildPolicies, cpw) 672 continue 673 } 674 675 // If the target does not exist in the child policy map, then a new 676 // child policy wrapper is created and added to the new list. 677 cpw := newChildPolicyWrapper(target) 678 b.childPolicies[target] = cpw 679 b.bg.Add(target, b.childPolicyBuilder) 680 b.logger.Infof("Child policy %q added to BalancerGroup", target) 681 newChildPolicies = append(newChildPolicies, cpw) 682 if err := b.buildAndPushChildPolicyConfigs(target, b.lbCfg, &balancer.ClientConnState{ 683 ResolverState: b.resolverState, 684 }); err != nil { 685 cpw.lamify(err) 686 } 687 } 688 b.stateMu.Unlock() 689 return newChildPolicies 690 } 691 692 // releaseChildPolicyReferences releases references to childPolicyWrappers 693 // corresponding to the passed in targets. If the release reference was the last 694 // one, the child policy is removed from the BalancerGroup. 695 func (b *rlsBalancer) releaseChildPolicyReferences(targets []string) { 696 b.stateMu.Lock() 697 for _, target := range targets { 698 if cpw := b.childPolicies[target]; cpw.releaseRef() { 699 delete(b.childPolicies, cpw.target) 700 b.bg.Remove(cpw.target) 701 } 702 } 703 b.stateMu.Unlock() 704 }