google.golang.org/grpc@v1.74.2/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go (about) 1 /* 2 * 3 * Copyright 2024 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package pickfirstleaf contains the pick_first load balancing policy which 20 // will be the universal leaf policy after dualstack changes are implemented. 21 // 22 // # Experimental 23 // 24 // Notice: This package is EXPERIMENTAL and may be changed or removed in a 25 // later release. 26 package pickfirstleaf 27 28 import ( 29 "encoding/json" 30 "errors" 31 "fmt" 32 "net" 33 "net/netip" 34 "sync" 35 "time" 36 37 "google.golang.org/grpc/balancer" 38 "google.golang.org/grpc/balancer/pickfirst/internal" 39 "google.golang.org/grpc/connectivity" 40 expstats "google.golang.org/grpc/experimental/stats" 41 "google.golang.org/grpc/grpclog" 42 "google.golang.org/grpc/internal/envconfig" 43 internalgrpclog "google.golang.org/grpc/internal/grpclog" 44 "google.golang.org/grpc/internal/pretty" 45 "google.golang.org/grpc/resolver" 46 "google.golang.org/grpc/serviceconfig" 47 ) 48 49 func init() { 50 if envconfig.NewPickFirstEnabled { 51 // Register as the default pick_first balancer. 52 Name = "pick_first" 53 } 54 balancer.Register(pickfirstBuilder{}) 55 } 56 57 // enableHealthListenerKeyType is a unique key type used in resolver 58 // attributes to indicate whether the health listener usage is enabled. 59 type enableHealthListenerKeyType struct{} 60 61 var ( 62 logger = grpclog.Component("pick-first-leaf-lb") 63 // Name is the name of the pick_first_leaf balancer. 64 // It is changed to "pick_first" in init() if this balancer is to be 65 // registered as the default pickfirst. 66 Name = "pick_first_leaf" 67 disconnectionsMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ 68 Name: "grpc.lb.pick_first.disconnections", 69 Description: "EXPERIMENTAL. Number of times the selected subchannel becomes disconnected.", 70 Unit: "disconnection", 71 Labels: []string{"grpc.target"}, 72 Default: false, 73 }) 74 connectionAttemptsSucceededMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ 75 Name: "grpc.lb.pick_first.connection_attempts_succeeded", 76 Description: "EXPERIMENTAL. Number of successful connection attempts.", 77 Unit: "attempt", 78 Labels: []string{"grpc.target"}, 79 Default: false, 80 }) 81 connectionAttemptsFailedMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ 82 Name: "grpc.lb.pick_first.connection_attempts_failed", 83 Description: "EXPERIMENTAL. Number of failed connection attempts.", 84 Unit: "attempt", 85 Labels: []string{"grpc.target"}, 86 Default: false, 87 }) 88 ) 89 90 const ( 91 // TODO: change to pick-first when this becomes the default pick_first policy. 92 logPrefix = "[pick-first-leaf-lb %p] " 93 // connectionDelayInterval is the time to wait for during the happy eyeballs 94 // pass before starting the next connection attempt. 95 connectionDelayInterval = 250 * time.Millisecond 96 ) 97 98 type ipAddrFamily int 99 100 const ( 101 // ipAddrFamilyUnknown represents strings that can't be parsed as an IP 102 // address. 103 ipAddrFamilyUnknown ipAddrFamily = iota 104 ipAddrFamilyV4 105 ipAddrFamilyV6 106 ) 107 108 type pickfirstBuilder struct{} 109 110 func (pickfirstBuilder) Build(cc balancer.ClientConn, bo balancer.BuildOptions) balancer.Balancer { 111 b := &pickfirstBalancer{ 112 cc: cc, 113 target: bo.Target.String(), 114 metricsRecorder: cc.MetricsRecorder(), 115 116 subConns: resolver.NewAddressMapV2[*scData](), 117 state: connectivity.Connecting, 118 cancelConnectionTimer: func() {}, 119 } 120 b.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf(logPrefix, b)) 121 return b 122 } 123 124 func (b pickfirstBuilder) Name() string { 125 return Name 126 } 127 128 func (pickfirstBuilder) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { 129 var cfg pfConfig 130 if err := json.Unmarshal(js, &cfg); err != nil { 131 return nil, fmt.Errorf("pickfirst: unable to unmarshal LB policy config: %s, error: %v", string(js), err) 132 } 133 return cfg, nil 134 } 135 136 // EnableHealthListener updates the state to configure pickfirst for using a 137 // generic health listener. 138 func EnableHealthListener(state resolver.State) resolver.State { 139 state.Attributes = state.Attributes.WithValue(enableHealthListenerKeyType{}, true) 140 return state 141 } 142 143 type pfConfig struct { 144 serviceconfig.LoadBalancingConfig `json:"-"` 145 146 // If set to true, instructs the LB policy to shuffle the order of the list 147 // of endpoints received from the name resolver before attempting to 148 // connect to them. 149 ShuffleAddressList bool `json:"shuffleAddressList"` 150 } 151 152 // scData keeps track of the current state of the subConn. 153 // It is not safe for concurrent access. 154 type scData struct { 155 // The following fields are initialized at build time and read-only after 156 // that. 157 subConn balancer.SubConn 158 addr resolver.Address 159 160 rawConnectivityState connectivity.State 161 // The effective connectivity state based on raw connectivity, health state 162 // and after following sticky TransientFailure behaviour defined in A62. 163 effectiveState connectivity.State 164 lastErr error 165 connectionFailedInFirstPass bool 166 } 167 168 func (b *pickfirstBalancer) newSCData(addr resolver.Address) (*scData, error) { 169 sd := &scData{ 170 rawConnectivityState: connectivity.Idle, 171 effectiveState: connectivity.Idle, 172 addr: addr, 173 } 174 sc, err := b.cc.NewSubConn([]resolver.Address{addr}, balancer.NewSubConnOptions{ 175 StateListener: func(state balancer.SubConnState) { 176 b.updateSubConnState(sd, state) 177 }, 178 }) 179 if err != nil { 180 return nil, err 181 } 182 sd.subConn = sc 183 return sd, nil 184 } 185 186 type pickfirstBalancer struct { 187 // The following fields are initialized at build time and read-only after 188 // that and therefore do not need to be guarded by a mutex. 189 logger *internalgrpclog.PrefixLogger 190 cc balancer.ClientConn 191 target string 192 metricsRecorder expstats.MetricsRecorder // guaranteed to be non nil 193 194 // The mutex is used to ensure synchronization of updates triggered 195 // from the idle picker and the already serialized resolver, 196 // SubConn state updates. 197 mu sync.Mutex 198 // State reported to the channel based on SubConn states and resolver 199 // updates. 200 state connectivity.State 201 // scData for active subonns mapped by address. 202 subConns *resolver.AddressMapV2[*scData] 203 addressList addressList 204 firstPass bool 205 numTF int 206 cancelConnectionTimer func() 207 healthCheckingEnabled bool 208 } 209 210 // ResolverError is called by the ClientConn when the name resolver produces 211 // an error or when pickfirst determined the resolver update to be invalid. 212 func (b *pickfirstBalancer) ResolverError(err error) { 213 b.mu.Lock() 214 defer b.mu.Unlock() 215 b.resolverErrorLocked(err) 216 } 217 218 func (b *pickfirstBalancer) resolverErrorLocked(err error) { 219 if b.logger.V(2) { 220 b.logger.Infof("Received error from the name resolver: %v", err) 221 } 222 223 // The picker will not change since the balancer does not currently 224 // report an error. If the balancer hasn't received a single good resolver 225 // update yet, transition to TRANSIENT_FAILURE. 226 if b.state != connectivity.TransientFailure && b.addressList.size() > 0 { 227 if b.logger.V(2) { 228 b.logger.Infof("Ignoring resolver error because balancer is using a previous good update.") 229 } 230 return 231 } 232 233 b.updateBalancerState(balancer.State{ 234 ConnectivityState: connectivity.TransientFailure, 235 Picker: &picker{err: fmt.Errorf("name resolver error: %v", err)}, 236 }) 237 } 238 239 func (b *pickfirstBalancer) UpdateClientConnState(state balancer.ClientConnState) error { 240 b.mu.Lock() 241 defer b.mu.Unlock() 242 b.cancelConnectionTimer() 243 if len(state.ResolverState.Addresses) == 0 && len(state.ResolverState.Endpoints) == 0 { 244 // Cleanup state pertaining to the previous resolver state. 245 // Treat an empty address list like an error by calling b.ResolverError. 246 b.closeSubConnsLocked() 247 b.addressList.updateAddrs(nil) 248 b.resolverErrorLocked(errors.New("produced zero addresses")) 249 return balancer.ErrBadResolverState 250 } 251 b.healthCheckingEnabled = state.ResolverState.Attributes.Value(enableHealthListenerKeyType{}) != nil 252 cfg, ok := state.BalancerConfig.(pfConfig) 253 if state.BalancerConfig != nil && !ok { 254 return fmt.Errorf("pickfirst: received illegal BalancerConfig (type %T): %v: %w", state.BalancerConfig, state.BalancerConfig, balancer.ErrBadResolverState) 255 } 256 257 if b.logger.V(2) { 258 b.logger.Infof("Received new config %s, resolver state %s", pretty.ToJSON(cfg), pretty.ToJSON(state.ResolverState)) 259 } 260 261 var newAddrs []resolver.Address 262 if endpoints := state.ResolverState.Endpoints; len(endpoints) != 0 { 263 // Perform the optional shuffling described in gRFC A62. The shuffling 264 // will change the order of endpoints but not touch the order of the 265 // addresses within each endpoint. - A61 266 if cfg.ShuffleAddressList { 267 endpoints = append([]resolver.Endpoint{}, endpoints...) 268 internal.RandShuffle(len(endpoints), func(i, j int) { endpoints[i], endpoints[j] = endpoints[j], endpoints[i] }) 269 } 270 271 // "Flatten the list by concatenating the ordered list of addresses for 272 // each of the endpoints, in order." - A61 273 for _, endpoint := range endpoints { 274 newAddrs = append(newAddrs, endpoint.Addresses...) 275 } 276 } else { 277 // Endpoints not set, process addresses until we migrate resolver 278 // emissions fully to Endpoints. The top channel does wrap emitted 279 // addresses with endpoints, however some balancers such as weighted 280 // target do not forward the corresponding correct endpoints down/split 281 // endpoints properly. Once all balancers correctly forward endpoints 282 // down, can delete this else conditional. 283 newAddrs = state.ResolverState.Addresses 284 if cfg.ShuffleAddressList { 285 newAddrs = append([]resolver.Address{}, newAddrs...) 286 internal.RandShuffle(len(endpoints), func(i, j int) { endpoints[i], endpoints[j] = endpoints[j], endpoints[i] }) 287 } 288 } 289 290 // If an address appears in multiple endpoints or in the same endpoint 291 // multiple times, we keep it only once. We will create only one SubConn 292 // for the address because an AddressMap is used to store SubConns. 293 // Not de-duplicating would result in attempting to connect to the same 294 // SubConn multiple times in the same pass. We don't want this. 295 newAddrs = deDupAddresses(newAddrs) 296 newAddrs = interleaveAddresses(newAddrs) 297 298 prevAddr := b.addressList.currentAddress() 299 prevSCData, found := b.subConns.Get(prevAddr) 300 prevAddrsCount := b.addressList.size() 301 isPrevRawConnectivityStateReady := found && prevSCData.rawConnectivityState == connectivity.Ready 302 b.addressList.updateAddrs(newAddrs) 303 304 // If the previous ready SubConn exists in new address list, 305 // keep this connection and don't create new SubConns. 306 if isPrevRawConnectivityStateReady && b.addressList.seekTo(prevAddr) { 307 return nil 308 } 309 310 b.reconcileSubConnsLocked(newAddrs) 311 // If it's the first resolver update or the balancer was already READY 312 // (but the new address list does not contain the ready SubConn) or 313 // CONNECTING, enter CONNECTING. 314 // We may be in TRANSIENT_FAILURE due to a previous empty address list, 315 // we should still enter CONNECTING because the sticky TF behaviour 316 // mentioned in A62 applies only when the TRANSIENT_FAILURE is reported 317 // due to connectivity failures. 318 if isPrevRawConnectivityStateReady || b.state == connectivity.Connecting || prevAddrsCount == 0 { 319 // Start connection attempt at first address. 320 b.forceUpdateConcludedStateLocked(balancer.State{ 321 ConnectivityState: connectivity.Connecting, 322 Picker: &picker{err: balancer.ErrNoSubConnAvailable}, 323 }) 324 b.startFirstPassLocked() 325 } else if b.state == connectivity.TransientFailure { 326 // If we're in TRANSIENT_FAILURE, we stay in TRANSIENT_FAILURE until 327 // we're READY. See A62. 328 b.startFirstPassLocked() 329 } 330 return nil 331 } 332 333 // UpdateSubConnState is unused as a StateListener is always registered when 334 // creating SubConns. 335 func (b *pickfirstBalancer) UpdateSubConnState(subConn balancer.SubConn, state balancer.SubConnState) { 336 b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", subConn, state) 337 } 338 339 func (b *pickfirstBalancer) Close() { 340 b.mu.Lock() 341 defer b.mu.Unlock() 342 b.closeSubConnsLocked() 343 b.cancelConnectionTimer() 344 b.state = connectivity.Shutdown 345 } 346 347 // ExitIdle moves the balancer out of idle state. It can be called concurrently 348 // by the idlePicker and clientConn so access to variables should be 349 // synchronized. 350 func (b *pickfirstBalancer) ExitIdle() { 351 b.mu.Lock() 352 defer b.mu.Unlock() 353 if b.state == connectivity.Idle { 354 b.startFirstPassLocked() 355 } 356 } 357 358 func (b *pickfirstBalancer) startFirstPassLocked() { 359 b.firstPass = true 360 b.numTF = 0 361 // Reset the connection attempt record for existing SubConns. 362 for _, sd := range b.subConns.Values() { 363 sd.connectionFailedInFirstPass = false 364 } 365 b.requestConnectionLocked() 366 } 367 368 func (b *pickfirstBalancer) closeSubConnsLocked() { 369 for _, sd := range b.subConns.Values() { 370 sd.subConn.Shutdown() 371 } 372 b.subConns = resolver.NewAddressMapV2[*scData]() 373 } 374 375 // deDupAddresses ensures that each address appears only once in the slice. 376 func deDupAddresses(addrs []resolver.Address) []resolver.Address { 377 seenAddrs := resolver.NewAddressMapV2[*scData]() 378 retAddrs := []resolver.Address{} 379 380 for _, addr := range addrs { 381 if _, ok := seenAddrs.Get(addr); ok { 382 continue 383 } 384 retAddrs = append(retAddrs, addr) 385 } 386 return retAddrs 387 } 388 389 // interleaveAddresses interleaves addresses of both families (IPv4 and IPv6) 390 // as per RFC-8305 section 4. 391 // Whichever address family is first in the list is followed by an address of 392 // the other address family; that is, if the first address in the list is IPv6, 393 // then the first IPv4 address should be moved up in the list to be second in 394 // the list. It doesn't support configuring "First Address Family Count", i.e. 395 // there will always be a single member of the first address family at the 396 // beginning of the interleaved list. 397 // Addresses that are neither IPv4 nor IPv6 are treated as part of a third 398 // "unknown" family for interleaving. 399 // See: https://datatracker.ietf.org/doc/html/rfc8305#autoid-6 400 func interleaveAddresses(addrs []resolver.Address) []resolver.Address { 401 familyAddrsMap := map[ipAddrFamily][]resolver.Address{} 402 interleavingOrder := []ipAddrFamily{} 403 for _, addr := range addrs { 404 family := addressFamily(addr.Addr) 405 if _, found := familyAddrsMap[family]; !found { 406 interleavingOrder = append(interleavingOrder, family) 407 } 408 familyAddrsMap[family] = append(familyAddrsMap[family], addr) 409 } 410 411 interleavedAddrs := make([]resolver.Address, 0, len(addrs)) 412 413 for curFamilyIdx := 0; len(interleavedAddrs) < len(addrs); curFamilyIdx = (curFamilyIdx + 1) % len(interleavingOrder) { 414 // Some IP types may have fewer addresses than others, so we look for 415 // the next type that has a remaining member to add to the interleaved 416 // list. 417 family := interleavingOrder[curFamilyIdx] 418 remainingMembers := familyAddrsMap[family] 419 if len(remainingMembers) > 0 { 420 interleavedAddrs = append(interleavedAddrs, remainingMembers[0]) 421 familyAddrsMap[family] = remainingMembers[1:] 422 } 423 } 424 425 return interleavedAddrs 426 } 427 428 // addressFamily returns the ipAddrFamily after parsing the address string. 429 // If the address isn't of the format "ip-address:port", it returns 430 // ipAddrFamilyUnknown. The address may be valid even if it's not an IP when 431 // using a resolver like passthrough where the address may be a hostname in 432 // some format that the dialer can resolve. 433 func addressFamily(address string) ipAddrFamily { 434 // Parse the IP after removing the port. 435 host, _, err := net.SplitHostPort(address) 436 if err != nil { 437 return ipAddrFamilyUnknown 438 } 439 ip, err := netip.ParseAddr(host) 440 if err != nil { 441 return ipAddrFamilyUnknown 442 } 443 switch { 444 case ip.Is4() || ip.Is4In6(): 445 return ipAddrFamilyV4 446 case ip.Is6(): 447 return ipAddrFamilyV6 448 default: 449 return ipAddrFamilyUnknown 450 } 451 } 452 453 // reconcileSubConnsLocked updates the active subchannels based on a new address 454 // list from the resolver. It does this by: 455 // - closing subchannels: any existing subchannels associated with addresses 456 // that are no longer in the updated list are shut down. 457 // - removing subchannels: entries for these closed subchannels are removed 458 // from the subchannel map. 459 // 460 // This ensures that the subchannel map accurately reflects the current set of 461 // addresses received from the name resolver. 462 func (b *pickfirstBalancer) reconcileSubConnsLocked(newAddrs []resolver.Address) { 463 newAddrsMap := resolver.NewAddressMapV2[bool]() 464 for _, addr := range newAddrs { 465 newAddrsMap.Set(addr, true) 466 } 467 468 for _, oldAddr := range b.subConns.Keys() { 469 if _, ok := newAddrsMap.Get(oldAddr); ok { 470 continue 471 } 472 val, _ := b.subConns.Get(oldAddr) 473 val.subConn.Shutdown() 474 b.subConns.Delete(oldAddr) 475 } 476 } 477 478 // shutdownRemainingLocked shuts down remaining subConns. Called when a subConn 479 // becomes ready, which means that all other subConn must be shutdown. 480 func (b *pickfirstBalancer) shutdownRemainingLocked(selected *scData) { 481 b.cancelConnectionTimer() 482 for _, sd := range b.subConns.Values() { 483 if sd.subConn != selected.subConn { 484 sd.subConn.Shutdown() 485 } 486 } 487 b.subConns = resolver.NewAddressMapV2[*scData]() 488 b.subConns.Set(selected.addr, selected) 489 } 490 491 // requestConnectionLocked starts connecting on the subchannel corresponding to 492 // the current address. If no subchannel exists, one is created. If the current 493 // subchannel is in TransientFailure, a connection to the next address is 494 // attempted until a subchannel is found. 495 func (b *pickfirstBalancer) requestConnectionLocked() { 496 if !b.addressList.isValid() { 497 return 498 } 499 var lastErr error 500 for valid := true; valid; valid = b.addressList.increment() { 501 curAddr := b.addressList.currentAddress() 502 sd, ok := b.subConns.Get(curAddr) 503 if !ok { 504 var err error 505 // We want to assign the new scData to sd from the outer scope, 506 // hence we can't use := below. 507 sd, err = b.newSCData(curAddr) 508 if err != nil { 509 // This should never happen, unless the clientConn is being shut 510 // down. 511 if b.logger.V(2) { 512 b.logger.Infof("Failed to create a subConn for address %v: %v", curAddr.String(), err) 513 } 514 // Do nothing, the LB policy will be closed soon. 515 return 516 } 517 b.subConns.Set(curAddr, sd) 518 } 519 520 switch sd.rawConnectivityState { 521 case connectivity.Idle: 522 sd.subConn.Connect() 523 b.scheduleNextConnectionLocked() 524 return 525 case connectivity.TransientFailure: 526 // The SubConn is being re-used and failed during a previous pass 527 // over the addressList. It has not completed backoff yet. 528 // Mark it as having failed and try the next address. 529 sd.connectionFailedInFirstPass = true 530 lastErr = sd.lastErr 531 continue 532 case connectivity.Connecting: 533 // Wait for the connection attempt to complete or the timer to fire 534 // before attempting the next address. 535 b.scheduleNextConnectionLocked() 536 return 537 default: 538 b.logger.Errorf("SubConn with unexpected state %v present in SubConns map.", sd.rawConnectivityState) 539 return 540 541 } 542 } 543 544 // All the remaining addresses in the list are in TRANSIENT_FAILURE, end the 545 // first pass if possible. 546 b.endFirstPassIfPossibleLocked(lastErr) 547 } 548 549 func (b *pickfirstBalancer) scheduleNextConnectionLocked() { 550 b.cancelConnectionTimer() 551 if !b.addressList.hasNext() { 552 return 553 } 554 curAddr := b.addressList.currentAddress() 555 cancelled := false // Access to this is protected by the balancer's mutex. 556 closeFn := internal.TimeAfterFunc(connectionDelayInterval, func() { 557 b.mu.Lock() 558 defer b.mu.Unlock() 559 // If the scheduled task is cancelled while acquiring the mutex, return. 560 if cancelled { 561 return 562 } 563 if b.logger.V(2) { 564 b.logger.Infof("Happy Eyeballs timer expired while waiting for connection to %q.", curAddr.Addr) 565 } 566 if b.addressList.increment() { 567 b.requestConnectionLocked() 568 } 569 }) 570 // Access to the cancellation callback held by the balancer is guarded by 571 // the balancer's mutex, so it's safe to set the boolean from the callback. 572 b.cancelConnectionTimer = sync.OnceFunc(func() { 573 cancelled = true 574 closeFn() 575 }) 576 } 577 578 func (b *pickfirstBalancer) updateSubConnState(sd *scData, newState balancer.SubConnState) { 579 b.mu.Lock() 580 defer b.mu.Unlock() 581 oldState := sd.rawConnectivityState 582 sd.rawConnectivityState = newState.ConnectivityState 583 // Previously relevant SubConns can still callback with state updates. 584 // To prevent pickers from returning these obsolete SubConns, this logic 585 // is included to check if the current list of active SubConns includes this 586 // SubConn. 587 if !b.isActiveSCData(sd) { 588 return 589 } 590 if newState.ConnectivityState == connectivity.Shutdown { 591 sd.effectiveState = connectivity.Shutdown 592 return 593 } 594 595 // Record a connection attempt when exiting CONNECTING. 596 if newState.ConnectivityState == connectivity.TransientFailure { 597 sd.connectionFailedInFirstPass = true 598 connectionAttemptsFailedMetric.Record(b.metricsRecorder, 1, b.target) 599 } 600 601 if newState.ConnectivityState == connectivity.Ready { 602 connectionAttemptsSucceededMetric.Record(b.metricsRecorder, 1, b.target) 603 b.shutdownRemainingLocked(sd) 604 if !b.addressList.seekTo(sd.addr) { 605 // This should not fail as we should have only one SubConn after 606 // entering READY. The SubConn should be present in the addressList. 607 b.logger.Errorf("Address %q not found address list in %v", sd.addr, b.addressList.addresses) 608 return 609 } 610 if !b.healthCheckingEnabled { 611 if b.logger.V(2) { 612 b.logger.Infof("SubConn %p reported connectivity state READY and the health listener is disabled. Transitioning SubConn to READY.", sd.subConn) 613 } 614 615 sd.effectiveState = connectivity.Ready 616 b.updateBalancerState(balancer.State{ 617 ConnectivityState: connectivity.Ready, 618 Picker: &picker{result: balancer.PickResult{SubConn: sd.subConn}}, 619 }) 620 return 621 } 622 if b.logger.V(2) { 623 b.logger.Infof("SubConn %p reported connectivity state READY. Registering health listener.", sd.subConn) 624 } 625 // Send a CONNECTING update to take the SubConn out of sticky-TF if 626 // required. 627 sd.effectiveState = connectivity.Connecting 628 b.updateBalancerState(balancer.State{ 629 ConnectivityState: connectivity.Connecting, 630 Picker: &picker{err: balancer.ErrNoSubConnAvailable}, 631 }) 632 sd.subConn.RegisterHealthListener(func(scs balancer.SubConnState) { 633 b.updateSubConnHealthState(sd, scs) 634 }) 635 return 636 } 637 638 // If the LB policy is READY, and it receives a subchannel state change, 639 // it means that the READY subchannel has failed. 640 // A SubConn can also transition from CONNECTING directly to IDLE when 641 // a transport is successfully created, but the connection fails 642 // before the SubConn can send the notification for READY. We treat 643 // this as a successful connection and transition to IDLE. 644 // TODO: https://github.com/grpc/grpc-go/issues/7862 - Remove the second 645 // part of the if condition below once the issue is fixed. 646 if oldState == connectivity.Ready || (oldState == connectivity.Connecting && newState.ConnectivityState == connectivity.Idle) { 647 // Once a transport fails, the balancer enters IDLE and starts from 648 // the first address when the picker is used. 649 b.shutdownRemainingLocked(sd) 650 sd.effectiveState = newState.ConnectivityState 651 // READY SubConn interspliced in between CONNECTING and IDLE, need to 652 // account for that. 653 if oldState == connectivity.Connecting { 654 // A known issue (https://github.com/grpc/grpc-go/issues/7862) 655 // causes a race that prevents the READY state change notification. 656 // This works around it. 657 connectionAttemptsSucceededMetric.Record(b.metricsRecorder, 1, b.target) 658 } 659 disconnectionsMetric.Record(b.metricsRecorder, 1, b.target) 660 b.addressList.reset() 661 b.updateBalancerState(balancer.State{ 662 ConnectivityState: connectivity.Idle, 663 Picker: &idlePicker{exitIdle: sync.OnceFunc(b.ExitIdle)}, 664 }) 665 return 666 } 667 668 if b.firstPass { 669 switch newState.ConnectivityState { 670 case connectivity.Connecting: 671 // The effective state can be in either IDLE, CONNECTING or 672 // TRANSIENT_FAILURE. If it's TRANSIENT_FAILURE, stay in 673 // TRANSIENT_FAILURE until it's READY. See A62. 674 if sd.effectiveState != connectivity.TransientFailure { 675 sd.effectiveState = connectivity.Connecting 676 b.updateBalancerState(balancer.State{ 677 ConnectivityState: connectivity.Connecting, 678 Picker: &picker{err: balancer.ErrNoSubConnAvailable}, 679 }) 680 } 681 case connectivity.TransientFailure: 682 sd.lastErr = newState.ConnectionError 683 sd.effectiveState = connectivity.TransientFailure 684 // Since we're re-using common SubConns while handling resolver 685 // updates, we could receive an out of turn TRANSIENT_FAILURE from 686 // a pass over the previous address list. Happy Eyeballs will also 687 // cause out of order updates to arrive. 688 689 if curAddr := b.addressList.currentAddress(); equalAddressIgnoringBalAttributes(&curAddr, &sd.addr) { 690 b.cancelConnectionTimer() 691 if b.addressList.increment() { 692 b.requestConnectionLocked() 693 return 694 } 695 } 696 697 // End the first pass if we've seen a TRANSIENT_FAILURE from all 698 // SubConns once. 699 b.endFirstPassIfPossibleLocked(newState.ConnectionError) 700 } 701 return 702 } 703 704 // We have finished the first pass, keep re-connecting failing SubConns. 705 switch newState.ConnectivityState { 706 case connectivity.TransientFailure: 707 b.numTF = (b.numTF + 1) % b.subConns.Len() 708 sd.lastErr = newState.ConnectionError 709 if b.numTF%b.subConns.Len() == 0 { 710 b.updateBalancerState(balancer.State{ 711 ConnectivityState: connectivity.TransientFailure, 712 Picker: &picker{err: newState.ConnectionError}, 713 }) 714 } 715 // We don't need to request re-resolution since the SubConn already 716 // does that before reporting TRANSIENT_FAILURE. 717 // TODO: #7534 - Move re-resolution requests from SubConn into 718 // pick_first. 719 case connectivity.Idle: 720 sd.subConn.Connect() 721 } 722 } 723 724 // endFirstPassIfPossibleLocked ends the first happy-eyeballs pass if all the 725 // addresses are tried and their SubConns have reported a failure. 726 func (b *pickfirstBalancer) endFirstPassIfPossibleLocked(lastErr error) { 727 // An optimization to avoid iterating over the entire SubConn map. 728 if b.addressList.isValid() { 729 return 730 } 731 // Connect() has been called on all the SubConns. The first pass can be 732 // ended if all the SubConns have reported a failure. 733 for _, sd := range b.subConns.Values() { 734 if !sd.connectionFailedInFirstPass { 735 return 736 } 737 } 738 b.firstPass = false 739 b.updateBalancerState(balancer.State{ 740 ConnectivityState: connectivity.TransientFailure, 741 Picker: &picker{err: lastErr}, 742 }) 743 // Start re-connecting all the SubConns that are already in IDLE. 744 for _, sd := range b.subConns.Values() { 745 if sd.rawConnectivityState == connectivity.Idle { 746 sd.subConn.Connect() 747 } 748 } 749 } 750 751 func (b *pickfirstBalancer) isActiveSCData(sd *scData) bool { 752 activeSD, found := b.subConns.Get(sd.addr) 753 return found && activeSD == sd 754 } 755 756 func (b *pickfirstBalancer) updateSubConnHealthState(sd *scData, state balancer.SubConnState) { 757 b.mu.Lock() 758 defer b.mu.Unlock() 759 // Previously relevant SubConns can still callback with state updates. 760 // To prevent pickers from returning these obsolete SubConns, this logic 761 // is included to check if the current list of active SubConns includes 762 // this SubConn. 763 if !b.isActiveSCData(sd) { 764 return 765 } 766 sd.effectiveState = state.ConnectivityState 767 switch state.ConnectivityState { 768 case connectivity.Ready: 769 b.updateBalancerState(balancer.State{ 770 ConnectivityState: connectivity.Ready, 771 Picker: &picker{result: balancer.PickResult{SubConn: sd.subConn}}, 772 }) 773 case connectivity.TransientFailure: 774 b.updateBalancerState(balancer.State{ 775 ConnectivityState: connectivity.TransientFailure, 776 Picker: &picker{err: fmt.Errorf("pickfirst: health check failure: %v", state.ConnectionError)}, 777 }) 778 case connectivity.Connecting: 779 b.updateBalancerState(balancer.State{ 780 ConnectivityState: connectivity.Connecting, 781 Picker: &picker{err: balancer.ErrNoSubConnAvailable}, 782 }) 783 default: 784 b.logger.Errorf("Got unexpected health update for SubConn %p: %v", state) 785 } 786 } 787 788 // updateBalancerState stores the state reported to the channel and calls 789 // ClientConn.UpdateState(). As an optimization, it avoids sending duplicate 790 // updates to the channel. 791 func (b *pickfirstBalancer) updateBalancerState(newState balancer.State) { 792 // In case of TransientFailures allow the picker to be updated to update 793 // the connectivity error, in all other cases don't send duplicate state 794 // updates. 795 if newState.ConnectivityState == b.state && b.state != connectivity.TransientFailure { 796 return 797 } 798 b.forceUpdateConcludedStateLocked(newState) 799 } 800 801 // forceUpdateConcludedStateLocked stores the state reported to the channel and 802 // calls ClientConn.UpdateState(). 803 // A separate function is defined to force update the ClientConn state since the 804 // channel doesn't correctly assume that LB policies start in CONNECTING and 805 // relies on LB policy to send an initial CONNECTING update. 806 func (b *pickfirstBalancer) forceUpdateConcludedStateLocked(newState balancer.State) { 807 b.state = newState.ConnectivityState 808 b.cc.UpdateState(newState) 809 } 810 811 type picker struct { 812 result balancer.PickResult 813 err error 814 } 815 816 func (p *picker) Pick(balancer.PickInfo) (balancer.PickResult, error) { 817 return p.result, p.err 818 } 819 820 // idlePicker is used when the SubConn is IDLE and kicks the SubConn into 821 // CONNECTING when Pick is called. 822 type idlePicker struct { 823 exitIdle func() 824 } 825 826 func (i *idlePicker) Pick(balancer.PickInfo) (balancer.PickResult, error) { 827 i.exitIdle() 828 return balancer.PickResult{}, balancer.ErrNoSubConnAvailable 829 } 830 831 // addressList manages sequentially iterating over addresses present in a list 832 // of endpoints. It provides a 1 dimensional view of the addresses present in 833 // the endpoints. 834 // This type is not safe for concurrent access. 835 type addressList struct { 836 addresses []resolver.Address 837 idx int 838 } 839 840 func (al *addressList) isValid() bool { 841 return al.idx < len(al.addresses) 842 } 843 844 func (al *addressList) size() int { 845 return len(al.addresses) 846 } 847 848 // increment moves to the next index in the address list. 849 // This method returns false if it went off the list, true otherwise. 850 func (al *addressList) increment() bool { 851 if !al.isValid() { 852 return false 853 } 854 al.idx++ 855 return al.idx < len(al.addresses) 856 } 857 858 // currentAddress returns the current address pointed to in the addressList. 859 // If the list is in an invalid state, it returns an empty address instead. 860 func (al *addressList) currentAddress() resolver.Address { 861 if !al.isValid() { 862 return resolver.Address{} 863 } 864 return al.addresses[al.idx] 865 } 866 867 func (al *addressList) reset() { 868 al.idx = 0 869 } 870 871 func (al *addressList) updateAddrs(addrs []resolver.Address) { 872 al.addresses = addrs 873 al.reset() 874 } 875 876 // seekTo returns false if the needle was not found and the current index was 877 // left unchanged. 878 func (al *addressList) seekTo(needle resolver.Address) bool { 879 for ai, addr := range al.addresses { 880 if !equalAddressIgnoringBalAttributes(&addr, &needle) { 881 continue 882 } 883 al.idx = ai 884 return true 885 } 886 return false 887 } 888 889 // hasNext returns whether incrementing the addressList will result in moving 890 // past the end of the list. If the list has already moved past the end, it 891 // returns false. 892 func (al *addressList) hasNext() bool { 893 if !al.isValid() { 894 return false 895 } 896 return al.idx+1 < len(al.addresses) 897 } 898 899 // equalAddressIgnoringBalAttributes returns true is a and b are considered 900 // equal. This is different from the Equal method on the resolver.Address type 901 // which considers all fields to determine equality. Here, we only consider 902 // fields that are meaningful to the SubConn. 903 func equalAddressIgnoringBalAttributes(a, b *resolver.Address) bool { 904 return a.Addr == b.Addr && a.ServerName == b.ServerName && 905 a.Attributes.Equal(b.Attributes) 906 }