google.golang.org/grpc@v1.72.2/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go (about) 1 /* 2 * 3 * Copyright 2024 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package pickfirstleaf contains the pick_first load balancing policy which 20 // will be the universal leaf policy after dualstack changes are implemented. 21 // 22 // # Experimental 23 // 24 // Notice: This package is EXPERIMENTAL and may be changed or removed in a 25 // later release. 26 package pickfirstleaf 27 28 import ( 29 "encoding/json" 30 "errors" 31 "fmt" 32 "net" 33 "net/netip" 34 "sync" 35 "time" 36 37 "google.golang.org/grpc/balancer" 38 "google.golang.org/grpc/balancer/pickfirst/internal" 39 "google.golang.org/grpc/connectivity" 40 expstats "google.golang.org/grpc/experimental/stats" 41 "google.golang.org/grpc/grpclog" 42 "google.golang.org/grpc/internal/envconfig" 43 internalgrpclog "google.golang.org/grpc/internal/grpclog" 44 "google.golang.org/grpc/internal/pretty" 45 "google.golang.org/grpc/resolver" 46 "google.golang.org/grpc/serviceconfig" 47 ) 48 49 func init() { 50 if envconfig.NewPickFirstEnabled { 51 // Register as the default pick_first balancer. 52 Name = "pick_first" 53 } 54 balancer.Register(pickfirstBuilder{}) 55 } 56 57 type ( 58 // enableHealthListenerKeyType is a unique key type used in resolver 59 // attributes to indicate whether the health listener usage is enabled. 60 enableHealthListenerKeyType struct{} 61 // managedByPickfirstKeyType is an attribute key type to inform Outlier 62 // Detection that the generic health listener is being used. 63 // TODO: https://github.com/grpc/grpc-go/issues/7915 - Remove this when 64 // implementing the dualstack design. This is a hack. Once Dualstack is 65 // completed, outlier detection will stop sending ejection updates through 66 // the connectivity listener. 67 managedByPickfirstKeyType struct{} 68 ) 69 70 var ( 71 logger = grpclog.Component("pick-first-leaf-lb") 72 // Name is the name of the pick_first_leaf balancer. 73 // It is changed to "pick_first" in init() if this balancer is to be 74 // registered as the default pickfirst. 75 Name = "pick_first_leaf" 76 disconnectionsMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ 77 Name: "grpc.lb.pick_first.disconnections", 78 Description: "EXPERIMENTAL. Number of times the selected subchannel becomes disconnected.", 79 Unit: "disconnection", 80 Labels: []string{"grpc.target"}, 81 Default: false, 82 }) 83 connectionAttemptsSucceededMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ 84 Name: "grpc.lb.pick_first.connection_attempts_succeeded", 85 Description: "EXPERIMENTAL. Number of successful connection attempts.", 86 Unit: "attempt", 87 Labels: []string{"grpc.target"}, 88 Default: false, 89 }) 90 connectionAttemptsFailedMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ 91 Name: "grpc.lb.pick_first.connection_attempts_failed", 92 Description: "EXPERIMENTAL. Number of failed connection attempts.", 93 Unit: "attempt", 94 Labels: []string{"grpc.target"}, 95 Default: false, 96 }) 97 ) 98 99 const ( 100 // TODO: change to pick-first when this becomes the default pick_first policy. 101 logPrefix = "[pick-first-leaf-lb %p] " 102 // connectionDelayInterval is the time to wait for during the happy eyeballs 103 // pass before starting the next connection attempt. 104 connectionDelayInterval = 250 * time.Millisecond 105 ) 106 107 type ipAddrFamily int 108 109 const ( 110 // ipAddrFamilyUnknown represents strings that can't be parsed as an IP 111 // address. 112 ipAddrFamilyUnknown ipAddrFamily = iota 113 ipAddrFamilyV4 114 ipAddrFamilyV6 115 ) 116 117 type pickfirstBuilder struct{} 118 119 func (pickfirstBuilder) Build(cc balancer.ClientConn, bo balancer.BuildOptions) balancer.Balancer { 120 b := &pickfirstBalancer{ 121 cc: cc, 122 target: bo.Target.String(), 123 metricsRecorder: cc.MetricsRecorder(), 124 125 subConns: resolver.NewAddressMapV2[*scData](), 126 state: connectivity.Connecting, 127 cancelConnectionTimer: func() {}, 128 } 129 b.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf(logPrefix, b)) 130 return b 131 } 132 133 func (b pickfirstBuilder) Name() string { 134 return Name 135 } 136 137 func (pickfirstBuilder) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { 138 var cfg pfConfig 139 if err := json.Unmarshal(js, &cfg); err != nil { 140 return nil, fmt.Errorf("pickfirst: unable to unmarshal LB policy config: %s, error: %v", string(js), err) 141 } 142 return cfg, nil 143 } 144 145 // EnableHealthListener updates the state to configure pickfirst for using a 146 // generic health listener. 147 func EnableHealthListener(state resolver.State) resolver.State { 148 state.Attributes = state.Attributes.WithValue(enableHealthListenerKeyType{}, true) 149 return state 150 } 151 152 // IsManagedByPickfirst returns whether an address belongs to a SubConn 153 // managed by the pickfirst LB policy. 154 // TODO: https://github.com/grpc/grpc-go/issues/7915 - This is a hack to disable 155 // outlier_detection via the with connectivity listener when using pick_first. 156 // Once Dualstack changes are complete, all SubConns will be created by 157 // pick_first and outlier detection will only use the health listener for 158 // ejection. This hack can then be removed. 159 func IsManagedByPickfirst(addr resolver.Address) bool { 160 return addr.BalancerAttributes.Value(managedByPickfirstKeyType{}) != nil 161 } 162 163 type pfConfig struct { 164 serviceconfig.LoadBalancingConfig `json:"-"` 165 166 // If set to true, instructs the LB policy to shuffle the order of the list 167 // of endpoints received from the name resolver before attempting to 168 // connect to them. 169 ShuffleAddressList bool `json:"shuffleAddressList"` 170 } 171 172 // scData keeps track of the current state of the subConn. 173 // It is not safe for concurrent access. 174 type scData struct { 175 // The following fields are initialized at build time and read-only after 176 // that. 177 subConn balancer.SubConn 178 addr resolver.Address 179 180 rawConnectivityState connectivity.State 181 // The effective connectivity state based on raw connectivity, health state 182 // and after following sticky TransientFailure behaviour defined in A62. 183 effectiveState connectivity.State 184 lastErr error 185 connectionFailedInFirstPass bool 186 } 187 188 func (b *pickfirstBalancer) newSCData(addr resolver.Address) (*scData, error) { 189 addr.BalancerAttributes = addr.BalancerAttributes.WithValue(managedByPickfirstKeyType{}, true) 190 sd := &scData{ 191 rawConnectivityState: connectivity.Idle, 192 effectiveState: connectivity.Idle, 193 addr: addr, 194 } 195 sc, err := b.cc.NewSubConn([]resolver.Address{addr}, balancer.NewSubConnOptions{ 196 StateListener: func(state balancer.SubConnState) { 197 b.updateSubConnState(sd, state) 198 }, 199 }) 200 if err != nil { 201 return nil, err 202 } 203 sd.subConn = sc 204 return sd, nil 205 } 206 207 type pickfirstBalancer struct { 208 // The following fields are initialized at build time and read-only after 209 // that and therefore do not need to be guarded by a mutex. 210 logger *internalgrpclog.PrefixLogger 211 cc balancer.ClientConn 212 target string 213 metricsRecorder expstats.MetricsRecorder // guaranteed to be non nil 214 215 // The mutex is used to ensure synchronization of updates triggered 216 // from the idle picker and the already serialized resolver, 217 // SubConn state updates. 218 mu sync.Mutex 219 // State reported to the channel based on SubConn states and resolver 220 // updates. 221 state connectivity.State 222 // scData for active subonns mapped by address. 223 subConns *resolver.AddressMapV2[*scData] 224 addressList addressList 225 firstPass bool 226 numTF int 227 cancelConnectionTimer func() 228 healthCheckingEnabled bool 229 } 230 231 // ResolverError is called by the ClientConn when the name resolver produces 232 // an error or when pickfirst determined the resolver update to be invalid. 233 func (b *pickfirstBalancer) ResolverError(err error) { 234 b.mu.Lock() 235 defer b.mu.Unlock() 236 b.resolverErrorLocked(err) 237 } 238 239 func (b *pickfirstBalancer) resolverErrorLocked(err error) { 240 if b.logger.V(2) { 241 b.logger.Infof("Received error from the name resolver: %v", err) 242 } 243 244 // The picker will not change since the balancer does not currently 245 // report an error. If the balancer hasn't received a single good resolver 246 // update yet, transition to TRANSIENT_FAILURE. 247 if b.state != connectivity.TransientFailure && b.addressList.size() > 0 { 248 if b.logger.V(2) { 249 b.logger.Infof("Ignoring resolver error because balancer is using a previous good update.") 250 } 251 return 252 } 253 254 b.updateBalancerState(balancer.State{ 255 ConnectivityState: connectivity.TransientFailure, 256 Picker: &picker{err: fmt.Errorf("name resolver error: %v", err)}, 257 }) 258 } 259 260 func (b *pickfirstBalancer) UpdateClientConnState(state balancer.ClientConnState) error { 261 b.mu.Lock() 262 defer b.mu.Unlock() 263 b.cancelConnectionTimer() 264 if len(state.ResolverState.Addresses) == 0 && len(state.ResolverState.Endpoints) == 0 { 265 // Cleanup state pertaining to the previous resolver state. 266 // Treat an empty address list like an error by calling b.ResolverError. 267 b.closeSubConnsLocked() 268 b.addressList.updateAddrs(nil) 269 b.resolverErrorLocked(errors.New("produced zero addresses")) 270 return balancer.ErrBadResolverState 271 } 272 b.healthCheckingEnabled = state.ResolverState.Attributes.Value(enableHealthListenerKeyType{}) != nil 273 cfg, ok := state.BalancerConfig.(pfConfig) 274 if state.BalancerConfig != nil && !ok { 275 return fmt.Errorf("pickfirst: received illegal BalancerConfig (type %T): %v: %w", state.BalancerConfig, state.BalancerConfig, balancer.ErrBadResolverState) 276 } 277 278 if b.logger.V(2) { 279 b.logger.Infof("Received new config %s, resolver state %s", pretty.ToJSON(cfg), pretty.ToJSON(state.ResolverState)) 280 } 281 282 var newAddrs []resolver.Address 283 if endpoints := state.ResolverState.Endpoints; len(endpoints) != 0 { 284 // Perform the optional shuffling described in gRFC A62. The shuffling 285 // will change the order of endpoints but not touch the order of the 286 // addresses within each endpoint. - A61 287 if cfg.ShuffleAddressList { 288 endpoints = append([]resolver.Endpoint{}, endpoints...) 289 internal.RandShuffle(len(endpoints), func(i, j int) { endpoints[i], endpoints[j] = endpoints[j], endpoints[i] }) 290 } 291 292 // "Flatten the list by concatenating the ordered list of addresses for 293 // each of the endpoints, in order." - A61 294 for _, endpoint := range endpoints { 295 newAddrs = append(newAddrs, endpoint.Addresses...) 296 } 297 } else { 298 // Endpoints not set, process addresses until we migrate resolver 299 // emissions fully to Endpoints. The top channel does wrap emitted 300 // addresses with endpoints, however some balancers such as weighted 301 // target do not forward the corresponding correct endpoints down/split 302 // endpoints properly. Once all balancers correctly forward endpoints 303 // down, can delete this else conditional. 304 newAddrs = state.ResolverState.Addresses 305 if cfg.ShuffleAddressList { 306 newAddrs = append([]resolver.Address{}, newAddrs...) 307 internal.RandShuffle(len(endpoints), func(i, j int) { endpoints[i], endpoints[j] = endpoints[j], endpoints[i] }) 308 } 309 } 310 311 // If an address appears in multiple endpoints or in the same endpoint 312 // multiple times, we keep it only once. We will create only one SubConn 313 // for the address because an AddressMap is used to store SubConns. 314 // Not de-duplicating would result in attempting to connect to the same 315 // SubConn multiple times in the same pass. We don't want this. 316 newAddrs = deDupAddresses(newAddrs) 317 newAddrs = interleaveAddresses(newAddrs) 318 319 prevAddr := b.addressList.currentAddress() 320 prevSCData, found := b.subConns.Get(prevAddr) 321 prevAddrsCount := b.addressList.size() 322 isPrevRawConnectivityStateReady := found && prevSCData.rawConnectivityState == connectivity.Ready 323 b.addressList.updateAddrs(newAddrs) 324 325 // If the previous ready SubConn exists in new address list, 326 // keep this connection and don't create new SubConns. 327 if isPrevRawConnectivityStateReady && b.addressList.seekTo(prevAddr) { 328 return nil 329 } 330 331 b.reconcileSubConnsLocked(newAddrs) 332 // If it's the first resolver update or the balancer was already READY 333 // (but the new address list does not contain the ready SubConn) or 334 // CONNECTING, enter CONNECTING. 335 // We may be in TRANSIENT_FAILURE due to a previous empty address list, 336 // we should still enter CONNECTING because the sticky TF behaviour 337 // mentioned in A62 applies only when the TRANSIENT_FAILURE is reported 338 // due to connectivity failures. 339 if isPrevRawConnectivityStateReady || b.state == connectivity.Connecting || prevAddrsCount == 0 { 340 // Start connection attempt at first address. 341 b.forceUpdateConcludedStateLocked(balancer.State{ 342 ConnectivityState: connectivity.Connecting, 343 Picker: &picker{err: balancer.ErrNoSubConnAvailable}, 344 }) 345 b.startFirstPassLocked() 346 } else if b.state == connectivity.TransientFailure { 347 // If we're in TRANSIENT_FAILURE, we stay in TRANSIENT_FAILURE until 348 // we're READY. See A62. 349 b.startFirstPassLocked() 350 } 351 return nil 352 } 353 354 // UpdateSubConnState is unused as a StateListener is always registered when 355 // creating SubConns. 356 func (b *pickfirstBalancer) UpdateSubConnState(subConn balancer.SubConn, state balancer.SubConnState) { 357 b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", subConn, state) 358 } 359 360 func (b *pickfirstBalancer) Close() { 361 b.mu.Lock() 362 defer b.mu.Unlock() 363 b.closeSubConnsLocked() 364 b.cancelConnectionTimer() 365 b.state = connectivity.Shutdown 366 } 367 368 // ExitIdle moves the balancer out of idle state. It can be called concurrently 369 // by the idlePicker and clientConn so access to variables should be 370 // synchronized. 371 func (b *pickfirstBalancer) ExitIdle() { 372 b.mu.Lock() 373 defer b.mu.Unlock() 374 if b.state == connectivity.Idle { 375 b.startFirstPassLocked() 376 } 377 } 378 379 func (b *pickfirstBalancer) startFirstPassLocked() { 380 b.firstPass = true 381 b.numTF = 0 382 // Reset the connection attempt record for existing SubConns. 383 for _, sd := range b.subConns.Values() { 384 sd.connectionFailedInFirstPass = false 385 } 386 b.requestConnectionLocked() 387 } 388 389 func (b *pickfirstBalancer) closeSubConnsLocked() { 390 for _, sd := range b.subConns.Values() { 391 sd.subConn.Shutdown() 392 } 393 b.subConns = resolver.NewAddressMapV2[*scData]() 394 } 395 396 // deDupAddresses ensures that each address appears only once in the slice. 397 func deDupAddresses(addrs []resolver.Address) []resolver.Address { 398 seenAddrs := resolver.NewAddressMapV2[*scData]() 399 retAddrs := []resolver.Address{} 400 401 for _, addr := range addrs { 402 if _, ok := seenAddrs.Get(addr); ok { 403 continue 404 } 405 retAddrs = append(retAddrs, addr) 406 } 407 return retAddrs 408 } 409 410 // interleaveAddresses interleaves addresses of both families (IPv4 and IPv6) 411 // as per RFC-8305 section 4. 412 // Whichever address family is first in the list is followed by an address of 413 // the other address family; that is, if the first address in the list is IPv6, 414 // then the first IPv4 address should be moved up in the list to be second in 415 // the list. It doesn't support configuring "First Address Family Count", i.e. 416 // there will always be a single member of the first address family at the 417 // beginning of the interleaved list. 418 // Addresses that are neither IPv4 nor IPv6 are treated as part of a third 419 // "unknown" family for interleaving. 420 // See: https://datatracker.ietf.org/doc/html/rfc8305#autoid-6 421 func interleaveAddresses(addrs []resolver.Address) []resolver.Address { 422 familyAddrsMap := map[ipAddrFamily][]resolver.Address{} 423 interleavingOrder := []ipAddrFamily{} 424 for _, addr := range addrs { 425 family := addressFamily(addr.Addr) 426 if _, found := familyAddrsMap[family]; !found { 427 interleavingOrder = append(interleavingOrder, family) 428 } 429 familyAddrsMap[family] = append(familyAddrsMap[family], addr) 430 } 431 432 interleavedAddrs := make([]resolver.Address, 0, len(addrs)) 433 434 for curFamilyIdx := 0; len(interleavedAddrs) < len(addrs); curFamilyIdx = (curFamilyIdx + 1) % len(interleavingOrder) { 435 // Some IP types may have fewer addresses than others, so we look for 436 // the next type that has a remaining member to add to the interleaved 437 // list. 438 family := interleavingOrder[curFamilyIdx] 439 remainingMembers := familyAddrsMap[family] 440 if len(remainingMembers) > 0 { 441 interleavedAddrs = append(interleavedAddrs, remainingMembers[0]) 442 familyAddrsMap[family] = remainingMembers[1:] 443 } 444 } 445 446 return interleavedAddrs 447 } 448 449 // addressFamily returns the ipAddrFamily after parsing the address string. 450 // If the address isn't of the format "ip-address:port", it returns 451 // ipAddrFamilyUnknown. The address may be valid even if it's not an IP when 452 // using a resolver like passthrough where the address may be a hostname in 453 // some format that the dialer can resolve. 454 func addressFamily(address string) ipAddrFamily { 455 // Parse the IP after removing the port. 456 host, _, err := net.SplitHostPort(address) 457 if err != nil { 458 return ipAddrFamilyUnknown 459 } 460 ip, err := netip.ParseAddr(host) 461 if err != nil { 462 return ipAddrFamilyUnknown 463 } 464 switch { 465 case ip.Is4() || ip.Is4In6(): 466 return ipAddrFamilyV4 467 case ip.Is6(): 468 return ipAddrFamilyV6 469 default: 470 return ipAddrFamilyUnknown 471 } 472 } 473 474 // reconcileSubConnsLocked updates the active subchannels based on a new address 475 // list from the resolver. It does this by: 476 // - closing subchannels: any existing subchannels associated with addresses 477 // that are no longer in the updated list are shut down. 478 // - removing subchannels: entries for these closed subchannels are removed 479 // from the subchannel map. 480 // 481 // This ensures that the subchannel map accurately reflects the current set of 482 // addresses received from the name resolver. 483 func (b *pickfirstBalancer) reconcileSubConnsLocked(newAddrs []resolver.Address) { 484 newAddrsMap := resolver.NewAddressMapV2[bool]() 485 for _, addr := range newAddrs { 486 newAddrsMap.Set(addr, true) 487 } 488 489 for _, oldAddr := range b.subConns.Keys() { 490 if _, ok := newAddrsMap.Get(oldAddr); ok { 491 continue 492 } 493 val, _ := b.subConns.Get(oldAddr) 494 val.subConn.Shutdown() 495 b.subConns.Delete(oldAddr) 496 } 497 } 498 499 // shutdownRemainingLocked shuts down remaining subConns. Called when a subConn 500 // becomes ready, which means that all other subConn must be shutdown. 501 func (b *pickfirstBalancer) shutdownRemainingLocked(selected *scData) { 502 b.cancelConnectionTimer() 503 for _, sd := range b.subConns.Values() { 504 if sd.subConn != selected.subConn { 505 sd.subConn.Shutdown() 506 } 507 } 508 b.subConns = resolver.NewAddressMapV2[*scData]() 509 b.subConns.Set(selected.addr, selected) 510 } 511 512 // requestConnectionLocked starts connecting on the subchannel corresponding to 513 // the current address. If no subchannel exists, one is created. If the current 514 // subchannel is in TransientFailure, a connection to the next address is 515 // attempted until a subchannel is found. 516 func (b *pickfirstBalancer) requestConnectionLocked() { 517 if !b.addressList.isValid() { 518 return 519 } 520 var lastErr error 521 for valid := true; valid; valid = b.addressList.increment() { 522 curAddr := b.addressList.currentAddress() 523 sd, ok := b.subConns.Get(curAddr) 524 if !ok { 525 var err error 526 // We want to assign the new scData to sd from the outer scope, 527 // hence we can't use := below. 528 sd, err = b.newSCData(curAddr) 529 if err != nil { 530 // This should never happen, unless the clientConn is being shut 531 // down. 532 if b.logger.V(2) { 533 b.logger.Infof("Failed to create a subConn for address %v: %v", curAddr.String(), err) 534 } 535 // Do nothing, the LB policy will be closed soon. 536 return 537 } 538 b.subConns.Set(curAddr, sd) 539 } 540 541 switch sd.rawConnectivityState { 542 case connectivity.Idle: 543 sd.subConn.Connect() 544 b.scheduleNextConnectionLocked() 545 return 546 case connectivity.TransientFailure: 547 // The SubConn is being re-used and failed during a previous pass 548 // over the addressList. It has not completed backoff yet. 549 // Mark it as having failed and try the next address. 550 sd.connectionFailedInFirstPass = true 551 lastErr = sd.lastErr 552 continue 553 case connectivity.Connecting: 554 // Wait for the connection attempt to complete or the timer to fire 555 // before attempting the next address. 556 b.scheduleNextConnectionLocked() 557 return 558 default: 559 b.logger.Errorf("SubConn with unexpected state %v present in SubConns map.", sd.rawConnectivityState) 560 return 561 562 } 563 } 564 565 // All the remaining addresses in the list are in TRANSIENT_FAILURE, end the 566 // first pass if possible. 567 b.endFirstPassIfPossibleLocked(lastErr) 568 } 569 570 func (b *pickfirstBalancer) scheduleNextConnectionLocked() { 571 b.cancelConnectionTimer() 572 if !b.addressList.hasNext() { 573 return 574 } 575 curAddr := b.addressList.currentAddress() 576 cancelled := false // Access to this is protected by the balancer's mutex. 577 closeFn := internal.TimeAfterFunc(connectionDelayInterval, func() { 578 b.mu.Lock() 579 defer b.mu.Unlock() 580 // If the scheduled task is cancelled while acquiring the mutex, return. 581 if cancelled { 582 return 583 } 584 if b.logger.V(2) { 585 b.logger.Infof("Happy Eyeballs timer expired while waiting for connection to %q.", curAddr.Addr) 586 } 587 if b.addressList.increment() { 588 b.requestConnectionLocked() 589 } 590 }) 591 // Access to the cancellation callback held by the balancer is guarded by 592 // the balancer's mutex, so it's safe to set the boolean from the callback. 593 b.cancelConnectionTimer = sync.OnceFunc(func() { 594 cancelled = true 595 closeFn() 596 }) 597 } 598 599 func (b *pickfirstBalancer) updateSubConnState(sd *scData, newState balancer.SubConnState) { 600 b.mu.Lock() 601 defer b.mu.Unlock() 602 oldState := sd.rawConnectivityState 603 sd.rawConnectivityState = newState.ConnectivityState 604 // Previously relevant SubConns can still callback with state updates. 605 // To prevent pickers from returning these obsolete SubConns, this logic 606 // is included to check if the current list of active SubConns includes this 607 // SubConn. 608 if !b.isActiveSCData(sd) { 609 return 610 } 611 if newState.ConnectivityState == connectivity.Shutdown { 612 sd.effectiveState = connectivity.Shutdown 613 return 614 } 615 616 // Record a connection attempt when exiting CONNECTING. 617 if newState.ConnectivityState == connectivity.TransientFailure { 618 sd.connectionFailedInFirstPass = true 619 connectionAttemptsFailedMetric.Record(b.metricsRecorder, 1, b.target) 620 } 621 622 if newState.ConnectivityState == connectivity.Ready { 623 connectionAttemptsSucceededMetric.Record(b.metricsRecorder, 1, b.target) 624 b.shutdownRemainingLocked(sd) 625 if !b.addressList.seekTo(sd.addr) { 626 // This should not fail as we should have only one SubConn after 627 // entering READY. The SubConn should be present in the addressList. 628 b.logger.Errorf("Address %q not found address list in %v", sd.addr, b.addressList.addresses) 629 return 630 } 631 if !b.healthCheckingEnabled { 632 if b.logger.V(2) { 633 b.logger.Infof("SubConn %p reported connectivity state READY and the health listener is disabled. Transitioning SubConn to READY.", sd.subConn) 634 } 635 636 sd.effectiveState = connectivity.Ready 637 b.updateBalancerState(balancer.State{ 638 ConnectivityState: connectivity.Ready, 639 Picker: &picker{result: balancer.PickResult{SubConn: sd.subConn}}, 640 }) 641 return 642 } 643 if b.logger.V(2) { 644 b.logger.Infof("SubConn %p reported connectivity state READY. Registering health listener.", sd.subConn) 645 } 646 // Send a CONNECTING update to take the SubConn out of sticky-TF if 647 // required. 648 sd.effectiveState = connectivity.Connecting 649 b.updateBalancerState(balancer.State{ 650 ConnectivityState: connectivity.Connecting, 651 Picker: &picker{err: balancer.ErrNoSubConnAvailable}, 652 }) 653 sd.subConn.RegisterHealthListener(func(scs balancer.SubConnState) { 654 b.updateSubConnHealthState(sd, scs) 655 }) 656 return 657 } 658 659 // If the LB policy is READY, and it receives a subchannel state change, 660 // it means that the READY subchannel has failed. 661 // A SubConn can also transition from CONNECTING directly to IDLE when 662 // a transport is successfully created, but the connection fails 663 // before the SubConn can send the notification for READY. We treat 664 // this as a successful connection and transition to IDLE. 665 // TODO: https://github.com/grpc/grpc-go/issues/7862 - Remove the second 666 // part of the if condition below once the issue is fixed. 667 if oldState == connectivity.Ready || (oldState == connectivity.Connecting && newState.ConnectivityState == connectivity.Idle) { 668 // Once a transport fails, the balancer enters IDLE and starts from 669 // the first address when the picker is used. 670 b.shutdownRemainingLocked(sd) 671 sd.effectiveState = newState.ConnectivityState 672 // READY SubConn interspliced in between CONNECTING and IDLE, need to 673 // account for that. 674 if oldState == connectivity.Connecting { 675 // A known issue (https://github.com/grpc/grpc-go/issues/7862) 676 // causes a race that prevents the READY state change notification. 677 // This works around it. 678 connectionAttemptsSucceededMetric.Record(b.metricsRecorder, 1, b.target) 679 } 680 disconnectionsMetric.Record(b.metricsRecorder, 1, b.target) 681 b.addressList.reset() 682 b.updateBalancerState(balancer.State{ 683 ConnectivityState: connectivity.Idle, 684 Picker: &idlePicker{exitIdle: sync.OnceFunc(b.ExitIdle)}, 685 }) 686 return 687 } 688 689 if b.firstPass { 690 switch newState.ConnectivityState { 691 case connectivity.Connecting: 692 // The effective state can be in either IDLE, CONNECTING or 693 // TRANSIENT_FAILURE. If it's TRANSIENT_FAILURE, stay in 694 // TRANSIENT_FAILURE until it's READY. See A62. 695 if sd.effectiveState != connectivity.TransientFailure { 696 sd.effectiveState = connectivity.Connecting 697 b.updateBalancerState(balancer.State{ 698 ConnectivityState: connectivity.Connecting, 699 Picker: &picker{err: balancer.ErrNoSubConnAvailable}, 700 }) 701 } 702 case connectivity.TransientFailure: 703 sd.lastErr = newState.ConnectionError 704 sd.effectiveState = connectivity.TransientFailure 705 // Since we're re-using common SubConns while handling resolver 706 // updates, we could receive an out of turn TRANSIENT_FAILURE from 707 // a pass over the previous address list. Happy Eyeballs will also 708 // cause out of order updates to arrive. 709 710 if curAddr := b.addressList.currentAddress(); equalAddressIgnoringBalAttributes(&curAddr, &sd.addr) { 711 b.cancelConnectionTimer() 712 if b.addressList.increment() { 713 b.requestConnectionLocked() 714 return 715 } 716 } 717 718 // End the first pass if we've seen a TRANSIENT_FAILURE from all 719 // SubConns once. 720 b.endFirstPassIfPossibleLocked(newState.ConnectionError) 721 } 722 return 723 } 724 725 // We have finished the first pass, keep re-connecting failing SubConns. 726 switch newState.ConnectivityState { 727 case connectivity.TransientFailure: 728 b.numTF = (b.numTF + 1) % b.subConns.Len() 729 sd.lastErr = newState.ConnectionError 730 if b.numTF%b.subConns.Len() == 0 { 731 b.updateBalancerState(balancer.State{ 732 ConnectivityState: connectivity.TransientFailure, 733 Picker: &picker{err: newState.ConnectionError}, 734 }) 735 } 736 // We don't need to request re-resolution since the SubConn already 737 // does that before reporting TRANSIENT_FAILURE. 738 // TODO: #7534 - Move re-resolution requests from SubConn into 739 // pick_first. 740 case connectivity.Idle: 741 sd.subConn.Connect() 742 } 743 } 744 745 // endFirstPassIfPossibleLocked ends the first happy-eyeballs pass if all the 746 // addresses are tried and their SubConns have reported a failure. 747 func (b *pickfirstBalancer) endFirstPassIfPossibleLocked(lastErr error) { 748 // An optimization to avoid iterating over the entire SubConn map. 749 if b.addressList.isValid() { 750 return 751 } 752 // Connect() has been called on all the SubConns. The first pass can be 753 // ended if all the SubConns have reported a failure. 754 for _, sd := range b.subConns.Values() { 755 if !sd.connectionFailedInFirstPass { 756 return 757 } 758 } 759 b.firstPass = false 760 b.updateBalancerState(balancer.State{ 761 ConnectivityState: connectivity.TransientFailure, 762 Picker: &picker{err: lastErr}, 763 }) 764 // Start re-connecting all the SubConns that are already in IDLE. 765 for _, sd := range b.subConns.Values() { 766 if sd.rawConnectivityState == connectivity.Idle { 767 sd.subConn.Connect() 768 } 769 } 770 } 771 772 func (b *pickfirstBalancer) isActiveSCData(sd *scData) bool { 773 activeSD, found := b.subConns.Get(sd.addr) 774 return found && activeSD == sd 775 } 776 777 func (b *pickfirstBalancer) updateSubConnHealthState(sd *scData, state balancer.SubConnState) { 778 b.mu.Lock() 779 defer b.mu.Unlock() 780 // Previously relevant SubConns can still callback with state updates. 781 // To prevent pickers from returning these obsolete SubConns, this logic 782 // is included to check if the current list of active SubConns includes 783 // this SubConn. 784 if !b.isActiveSCData(sd) { 785 return 786 } 787 sd.effectiveState = state.ConnectivityState 788 switch state.ConnectivityState { 789 case connectivity.Ready: 790 b.updateBalancerState(balancer.State{ 791 ConnectivityState: connectivity.Ready, 792 Picker: &picker{result: balancer.PickResult{SubConn: sd.subConn}}, 793 }) 794 case connectivity.TransientFailure: 795 b.updateBalancerState(balancer.State{ 796 ConnectivityState: connectivity.TransientFailure, 797 Picker: &picker{err: fmt.Errorf("pickfirst: health check failure: %v", state.ConnectionError)}, 798 }) 799 case connectivity.Connecting: 800 b.updateBalancerState(balancer.State{ 801 ConnectivityState: connectivity.Connecting, 802 Picker: &picker{err: balancer.ErrNoSubConnAvailable}, 803 }) 804 default: 805 b.logger.Errorf("Got unexpected health update for SubConn %p: %v", state) 806 } 807 } 808 809 // updateBalancerState stores the state reported to the channel and calls 810 // ClientConn.UpdateState(). As an optimization, it avoids sending duplicate 811 // updates to the channel. 812 func (b *pickfirstBalancer) updateBalancerState(newState balancer.State) { 813 // In case of TransientFailures allow the picker to be updated to update 814 // the connectivity error, in all other cases don't send duplicate state 815 // updates. 816 if newState.ConnectivityState == b.state && b.state != connectivity.TransientFailure { 817 return 818 } 819 b.forceUpdateConcludedStateLocked(newState) 820 } 821 822 // forceUpdateConcludedStateLocked stores the state reported to the channel and 823 // calls ClientConn.UpdateState(). 824 // A separate function is defined to force update the ClientConn state since the 825 // channel doesn't correctly assume that LB policies start in CONNECTING and 826 // relies on LB policy to send an initial CONNECTING update. 827 func (b *pickfirstBalancer) forceUpdateConcludedStateLocked(newState balancer.State) { 828 b.state = newState.ConnectivityState 829 b.cc.UpdateState(newState) 830 } 831 832 type picker struct { 833 result balancer.PickResult 834 err error 835 } 836 837 func (p *picker) Pick(balancer.PickInfo) (balancer.PickResult, error) { 838 return p.result, p.err 839 } 840 841 // idlePicker is used when the SubConn is IDLE and kicks the SubConn into 842 // CONNECTING when Pick is called. 843 type idlePicker struct { 844 exitIdle func() 845 } 846 847 func (i *idlePicker) Pick(balancer.PickInfo) (balancer.PickResult, error) { 848 i.exitIdle() 849 return balancer.PickResult{}, balancer.ErrNoSubConnAvailable 850 } 851 852 // addressList manages sequentially iterating over addresses present in a list 853 // of endpoints. It provides a 1 dimensional view of the addresses present in 854 // the endpoints. 855 // This type is not safe for concurrent access. 856 type addressList struct { 857 addresses []resolver.Address 858 idx int 859 } 860 861 func (al *addressList) isValid() bool { 862 return al.idx < len(al.addresses) 863 } 864 865 func (al *addressList) size() int { 866 return len(al.addresses) 867 } 868 869 // increment moves to the next index in the address list. 870 // This method returns false if it went off the list, true otherwise. 871 func (al *addressList) increment() bool { 872 if !al.isValid() { 873 return false 874 } 875 al.idx++ 876 return al.idx < len(al.addresses) 877 } 878 879 // currentAddress returns the current address pointed to in the addressList. 880 // If the list is in an invalid state, it returns an empty address instead. 881 func (al *addressList) currentAddress() resolver.Address { 882 if !al.isValid() { 883 return resolver.Address{} 884 } 885 return al.addresses[al.idx] 886 } 887 888 func (al *addressList) reset() { 889 al.idx = 0 890 } 891 892 func (al *addressList) updateAddrs(addrs []resolver.Address) { 893 al.addresses = addrs 894 al.reset() 895 } 896 897 // seekTo returns false if the needle was not found and the current index was 898 // left unchanged. 899 func (al *addressList) seekTo(needle resolver.Address) bool { 900 for ai, addr := range al.addresses { 901 if !equalAddressIgnoringBalAttributes(&addr, &needle) { 902 continue 903 } 904 al.idx = ai 905 return true 906 } 907 return false 908 } 909 910 // hasNext returns whether incrementing the addressList will result in moving 911 // past the end of the list. If the list has already moved past the end, it 912 // returns false. 913 func (al *addressList) hasNext() bool { 914 if !al.isValid() { 915 return false 916 } 917 return al.idx+1 < len(al.addresses) 918 } 919 920 // equalAddressIgnoringBalAttributes returns true is a and b are considered 921 // equal. This is different from the Equal method on the resolver.Address type 922 // which considers all fields to determine equality. Here, we only consider 923 // fields that are meaningful to the SubConn. 924 func equalAddressIgnoringBalAttributes(a, b *resolver.Address) bool { 925 return a.Addr == b.Addr && a.ServerName == b.ServerName && 926 a.Attributes.Equal(b.Attributes) 927 }