google.golang.org/grpc@v1.74.2/xds/internal/balancer/outlierdetection/balancer.go (about) 1 /* 2 * 3 * Copyright 2022 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package outlierdetection provides an implementation of the outlier detection 20 // LB policy, as defined in 21 // https://github.com/grpc/proposal/blob/master/A50-xds-outlier-detection.md. 22 package outlierdetection 23 24 import ( 25 "encoding/json" 26 "fmt" 27 "math" 28 rand "math/rand/v2" 29 "strings" 30 "sync" 31 "sync/atomic" 32 "time" 33 34 "google.golang.org/grpc/balancer" 35 "google.golang.org/grpc/connectivity" 36 "google.golang.org/grpc/internal/balancer/gracefulswitch" 37 "google.golang.org/grpc/internal/buffer" 38 "google.golang.org/grpc/internal/channelz" 39 "google.golang.org/grpc/internal/grpclog" 40 "google.golang.org/grpc/internal/grpcsync" 41 iserviceconfig "google.golang.org/grpc/internal/serviceconfig" 42 "google.golang.org/grpc/resolver" 43 "google.golang.org/grpc/serviceconfig" 44 ) 45 46 // Globals to stub out in tests. 47 var ( 48 afterFunc = time.AfterFunc 49 now = time.Now 50 ) 51 52 // Name is the name of the outlier detection balancer. 53 const Name = "outlier_detection_experimental" 54 55 func init() { 56 balancer.Register(bb{}) 57 } 58 59 type bb struct{} 60 61 func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer { 62 b := &outlierDetectionBalancer{ 63 ClientConn: cc, 64 closed: grpcsync.NewEvent(), 65 done: grpcsync.NewEvent(), 66 addrs: make(map[string]*endpointInfo), 67 scUpdateCh: buffer.NewUnbounded(), 68 pickerUpdateCh: buffer.NewUnbounded(), 69 channelzParent: bOpts.ChannelzParent, 70 endpoints: resolver.NewEndpointMap[*endpointInfo](), 71 } 72 b.logger = prefixLogger(b) 73 b.logger.Infof("Created") 74 b.child = synchronizingBalancerWrapper{lb: gracefulswitch.NewBalancer(b, bOpts)} 75 go b.run() 76 return b 77 } 78 79 func (bb) ParseConfig(s json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { 80 lbCfg := &LBConfig{ 81 // Default top layer values as documented in A50. 82 Interval: iserviceconfig.Duration(10 * time.Second), 83 BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), 84 MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), 85 MaxEjectionPercent: 10, 86 } 87 88 // This unmarshalling handles underlying layers sre and fpe which have their 89 // own defaults for their fields if either sre or fpe are present. 90 if err := json.Unmarshal(s, lbCfg); err != nil { // Validates child config if present as well. 91 return nil, fmt.Errorf("xds: unable to unmarshal LBconfig: %s, error: %v", string(s), err) 92 } 93 94 // Note: in the xds flow, these validations will never fail. The xdsclient 95 // performs the same validations as here on the xds Outlier Detection 96 // resource before parsing resource into JSON which this function gets 97 // called with. A50 defines two separate places for these validations to 98 // take place, the xdsclient and this ParseConfig method. "When parsing a 99 // config from JSON, if any of these requirements is violated, that should 100 // be treated as a parsing error." - A50 101 switch { 102 // "The google.protobuf.Duration fields interval, base_ejection_time, and 103 // max_ejection_time must obey the restrictions in the 104 // google.protobuf.Duration documentation and they must have non-negative 105 // values." - A50 106 // Approximately 290 years is the maximum time that time.Duration (int64) 107 // can represent. The restrictions on the protobuf.Duration field are to be 108 // within +-10000 years. Thus, just check for negative values. 109 case lbCfg.Interval < 0: 110 return nil, fmt.Errorf("OutlierDetectionLoadBalancingConfig.interval = %s; must be >= 0", lbCfg.Interval) 111 case lbCfg.BaseEjectionTime < 0: 112 return nil, fmt.Errorf("OutlierDetectionLoadBalancingConfig.base_ejection_time = %s; must be >= 0", lbCfg.BaseEjectionTime) 113 case lbCfg.MaxEjectionTime < 0: 114 return nil, fmt.Errorf("OutlierDetectionLoadBalancingConfig.max_ejection_time = %s; must be >= 0", lbCfg.MaxEjectionTime) 115 116 // "The fields max_ejection_percent, 117 // success_rate_ejection.enforcement_percentage, 118 // failure_percentage_ejection.threshold, and 119 // failure_percentage.enforcement_percentage must have values less than or 120 // equal to 100." - A50 121 case lbCfg.MaxEjectionPercent > 100: 122 return nil, fmt.Errorf("OutlierDetectionLoadBalancingConfig.max_ejection_percent = %v; must be <= 100", lbCfg.MaxEjectionPercent) 123 case lbCfg.SuccessRateEjection != nil && lbCfg.SuccessRateEjection.EnforcementPercentage > 100: 124 return nil, fmt.Errorf("OutlierDetectionLoadBalancingConfig.SuccessRateEjection.enforcement_percentage = %v; must be <= 100", lbCfg.SuccessRateEjection.EnforcementPercentage) 125 case lbCfg.FailurePercentageEjection != nil && lbCfg.FailurePercentageEjection.Threshold > 100: 126 return nil, fmt.Errorf("OutlierDetectionLoadBalancingConfig.FailurePercentageEjection.threshold = %v; must be <= 100", lbCfg.FailurePercentageEjection.Threshold) 127 case lbCfg.FailurePercentageEjection != nil && lbCfg.FailurePercentageEjection.EnforcementPercentage > 100: 128 return nil, fmt.Errorf("OutlierDetectionLoadBalancingConfig.FailurePercentageEjection.enforcement_percentage = %v; must be <= 100", lbCfg.FailurePercentageEjection.EnforcementPercentage) 129 } 130 return lbCfg, nil 131 } 132 133 func (bb) Name() string { 134 return Name 135 } 136 137 // scUpdate wraps a subConn update to be sent to the child balancer. 138 type scUpdate struct { 139 scw *subConnWrapper 140 state balancer.SubConnState 141 } 142 143 type ejectionUpdate struct { 144 scw *subConnWrapper 145 isEjected bool // true for ejected, false for unejected 146 } 147 148 type lbCfgUpdate struct { 149 lbCfg *LBConfig 150 // to make sure picker is updated synchronously. 151 done chan struct{} 152 } 153 154 type scHealthUpdate struct { 155 scw *subConnWrapper 156 state balancer.SubConnState 157 } 158 159 type outlierDetectionBalancer struct { 160 balancer.ClientConn 161 // These fields are safe to be accessed without holding any mutex because 162 // they are synchronized in run(), which makes these field accesses happen 163 // serially. 164 // 165 // childState is the latest balancer state received from the child. 166 childState balancer.State 167 // recentPickerNoop represents whether the most recent picker sent upward to 168 // the balancer.ClientConn is a noop picker, which doesn't count RPC's. Used 169 // to suppress redundant picker updates. 170 recentPickerNoop bool 171 172 closed *grpcsync.Event 173 done *grpcsync.Event 174 logger *grpclog.PrefixLogger 175 channelzParent channelz.Identifier 176 177 child synchronizingBalancerWrapper 178 179 // mu guards access to the following fields. It also helps to synchronize 180 // behaviors of the following events: config updates, firing of the interval 181 // timer, SubConn State updates, SubConn address updates, and child state 182 // updates. 183 // 184 // For example, when we receive a config update in the middle of the 185 // interval timer algorithm, which uses knobs present in the config, the 186 // balancer will wait for the interval timer algorithm to finish before 187 // persisting the new configuration. 188 // 189 // Another example would be the updating of the endpoints or addrs map, such 190 // as from a SubConn address update in the middle of the interval timer 191 // algorithm which uses endpoints. This balancer waits for the interval 192 // timer algorithm to finish before making the update to the endpoints map. 193 // 194 // This mutex is never held when calling methods on the child policy 195 // (within the context of a single goroutine). 196 mu sync.Mutex 197 // endpoints stores pointers to endpointInfo objects for each endpoint. 198 endpoints *resolver.EndpointMap[*endpointInfo] 199 // addrs stores pointers to endpointInfo objects for each address. Addresses 200 // belonging to the same endpoint point to the same object. 201 addrs map[string]*endpointInfo 202 cfg *LBConfig 203 timerStartTime time.Time 204 intervalTimer *time.Timer 205 inhibitPickerUpdates bool 206 updateUnconditionally bool 207 numEndpointsEjected int // For fast calculations of percentage of endpoints ejected 208 209 scUpdateCh *buffer.Unbounded 210 pickerUpdateCh *buffer.Unbounded 211 } 212 213 // noopConfig returns whether this balancer is configured with a logical no-op 214 // configuration or not. 215 // 216 // Caller must hold b.mu. 217 func (b *outlierDetectionBalancer) noopConfig() bool { 218 return b.cfg.SuccessRateEjection == nil && b.cfg.FailurePercentageEjection == nil 219 } 220 221 // onIntervalConfig handles logic required specifically on the receipt of a 222 // configuration which specifies to count RPC's and periodically perform passive 223 // health checking based on heuristics defined in configuration every configured 224 // interval. 225 // 226 // Caller must hold b.mu. 227 func (b *outlierDetectionBalancer) onIntervalConfig() { 228 var interval time.Duration 229 if b.timerStartTime.IsZero() { 230 b.timerStartTime = time.Now() 231 for _, epInfo := range b.endpoints.Values() { 232 epInfo.callCounter.clear() 233 } 234 interval = time.Duration(b.cfg.Interval) 235 } else { 236 interval = time.Duration(b.cfg.Interval) - now().Sub(b.timerStartTime) 237 if interval < 0 { 238 interval = 0 239 } 240 } 241 b.intervalTimer = afterFunc(interval, b.intervalTimerAlgorithm) 242 } 243 244 // onNoopConfig handles logic required specifically on the receipt of a 245 // configuration which specifies the balancer to be a noop. 246 // 247 // Caller must hold b.mu. 248 func (b *outlierDetectionBalancer) onNoopConfig() { 249 // "If a config is provided with both the `success_rate_ejection` and 250 // `failure_percentage_ejection` fields unset, skip starting the timer and 251 // do the following:" 252 // "Unset the timer start timestamp." 253 b.timerStartTime = time.Time{} 254 for _, epInfo := range b.endpoints.Values() { 255 // "Uneject all currently ejected endpoints." 256 if !epInfo.latestEjectionTimestamp.IsZero() { 257 b.unejectEndpoint(epInfo) 258 } 259 // "Reset each endpoint's ejection time multiplier to 0." 260 epInfo.ejectionTimeMultiplier = 0 261 } 262 } 263 264 func (b *outlierDetectionBalancer) UpdateClientConnState(s balancer.ClientConnState) error { 265 lbCfg, ok := s.BalancerConfig.(*LBConfig) 266 if !ok { 267 b.logger.Errorf("received config with unexpected type %T: %v", s.BalancerConfig, s.BalancerConfig) 268 return balancer.ErrBadResolverState 269 } 270 271 // Reject whole config if child policy doesn't exist, don't persist it for 272 // later. 273 bb := balancer.Get(lbCfg.ChildPolicy.Name) 274 if bb == nil { 275 return fmt.Errorf("outlier detection: child balancer %q not registered", lbCfg.ChildPolicy.Name) 276 } 277 278 // It is safe to read b.cfg here without holding the mutex, as the only 279 // write to b.cfg happens later in this function. This function is part of 280 // the balancer.Balancer API, so it is guaranteed to be called in a 281 // synchronous manner, so it cannot race with this read. 282 if b.cfg == nil || b.cfg.ChildPolicy.Name != lbCfg.ChildPolicy.Name { 283 if err := b.child.switchTo(bb); err != nil { 284 return fmt.Errorf("outlier detection: error switching to child of type %q: %v", lbCfg.ChildPolicy.Name, err) 285 } 286 } 287 288 b.mu.Lock() 289 // Inhibit child picker updates until this UpdateClientConnState() call 290 // completes. If needed, a picker update containing the no-op config bit 291 // determined from this config and most recent state from the child will be 292 // sent synchronously upward at the end of this UpdateClientConnState() 293 // call. 294 b.inhibitPickerUpdates = true 295 b.updateUnconditionally = false 296 b.cfg = lbCfg 297 298 newEndpoints := resolver.NewEndpointMap[bool]() 299 for _, ep := range s.ResolverState.Endpoints { 300 newEndpoints.Set(ep, true) 301 if _, ok := b.endpoints.Get(ep); !ok { 302 b.endpoints.Set(ep, newEndpointInfo()) 303 } 304 } 305 306 for _, ep := range b.endpoints.Keys() { 307 if _, ok := newEndpoints.Get(ep); !ok { 308 b.endpoints.Delete(ep) 309 } 310 } 311 312 // populate the addrs map. 313 b.addrs = map[string]*endpointInfo{} 314 for _, ep := range s.ResolverState.Endpoints { 315 epInfo, _ := b.endpoints.Get(ep) 316 for _, addr := range ep.Addresses { 317 if _, ok := b.addrs[addr.Addr]; ok { 318 b.logger.Errorf("Endpoints contain duplicate address %q", addr.Addr) 319 continue 320 } 321 b.addrs[addr.Addr] = epInfo 322 } 323 } 324 325 if b.intervalTimer != nil { 326 b.intervalTimer.Stop() 327 } 328 329 if b.noopConfig() { 330 b.onNoopConfig() 331 } else { 332 b.onIntervalConfig() 333 } 334 b.mu.Unlock() 335 336 err := b.child.updateClientConnState(balancer.ClientConnState{ 337 ResolverState: s.ResolverState, 338 BalancerConfig: b.cfg.ChildPolicy.Config, 339 }) 340 341 done := make(chan struct{}) 342 b.pickerUpdateCh.Put(lbCfgUpdate{ 343 lbCfg: lbCfg, 344 done: done, 345 }) 346 <-done 347 348 return err 349 } 350 351 func (b *outlierDetectionBalancer) ResolverError(err error) { 352 b.child.resolverError(err) 353 } 354 355 func (b *outlierDetectionBalancer) updateSubConnState(scw *subConnWrapper, state balancer.SubConnState) { 356 b.mu.Lock() 357 defer b.mu.Unlock() 358 scw.setLatestConnectivityState(state.ConnectivityState) 359 b.scUpdateCh.Put(&scUpdate{ 360 scw: scw, 361 state: state, 362 }) 363 } 364 365 func (b *outlierDetectionBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 366 b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state) 367 } 368 369 func (b *outlierDetectionBalancer) Close() { 370 b.closed.Fire() 371 <-b.done.Done() 372 b.child.closeLB() 373 374 b.scUpdateCh.Close() 375 b.pickerUpdateCh.Close() 376 377 b.mu.Lock() 378 defer b.mu.Unlock() 379 if b.intervalTimer != nil { 380 b.intervalTimer.Stop() 381 } 382 } 383 384 func (b *outlierDetectionBalancer) ExitIdle() { 385 b.child.exitIdle() 386 } 387 388 // wrappedPicker delegates to the child policy's picker, and when the request 389 // finishes, it increments the corresponding counter in the map entry referenced 390 // by the subConnWrapper that was picked. If both the `success_rate_ejection` 391 // and `failure_percentage_ejection` fields are unset in the configuration, this 392 // picker will not count. 393 type wrappedPicker struct { 394 childPicker balancer.Picker 395 noopPicker bool 396 } 397 398 func (wp *wrappedPicker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { 399 pr, err := wp.childPicker.Pick(info) 400 if err != nil { 401 return balancer.PickResult{}, err 402 } 403 404 done := func(di balancer.DoneInfo) { 405 if !wp.noopPicker { 406 incrementCounter(pr.SubConn, di) 407 } 408 if pr.Done != nil { 409 pr.Done(di) 410 } 411 } 412 scw, ok := pr.SubConn.(*subConnWrapper) 413 if !ok { 414 // This can never happen, but check is present for defensive 415 // programming. 416 logger.Errorf("Picked SubConn from child picker is not a SubConnWrapper") 417 return balancer.PickResult{ 418 SubConn: pr.SubConn, 419 Done: done, 420 Metadata: pr.Metadata, 421 }, nil 422 } 423 return balancer.PickResult{ 424 SubConn: scw.SubConn, 425 Done: done, 426 Metadata: pr.Metadata, 427 }, nil 428 } 429 430 func incrementCounter(sc balancer.SubConn, info balancer.DoneInfo) { 431 scw, ok := sc.(*subConnWrapper) 432 if !ok { 433 // Shouldn't happen, as comes from child 434 return 435 } 436 437 // scw.endpointInfo and callCounter.activeBucket can be written to 438 // concurrently (the pointers themselves). Thus, protect the reads here with 439 // atomics to prevent data corruption. There exists a race in which you read 440 // the endpointInfo or active bucket pointer and then that pointer points to 441 // deprecated memory. If this goroutine yields the processor, in between 442 // reading the endpointInfo pointer and writing to the active bucket, 443 // UpdateAddresses can switch the endpointInfo the scw points to. Writing to 444 // an outdated endpoint is a very small race and tolerable. After reading 445 // callCounter.activeBucket in this picker a swap call can concurrently 446 // change what activeBucket points to. A50 says to swap the pointer, which 447 // will cause this race to write to deprecated memory the interval timer 448 // algorithm will never read, which makes this race alright. 449 epInfo := scw.endpointInfo.Load() 450 if epInfo == nil { 451 return 452 } 453 ab := epInfo.callCounter.activeBucket.Load() 454 455 if info.Err == nil { 456 atomic.AddUint32(&ab.numSuccesses, 1) 457 } else { 458 atomic.AddUint32(&ab.numFailures, 1) 459 } 460 } 461 462 func (b *outlierDetectionBalancer) UpdateState(s balancer.State) { 463 b.pickerUpdateCh.Put(s) 464 } 465 466 func (b *outlierDetectionBalancer) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 467 oldListener := opts.StateListener 468 scw := &subConnWrapper{ 469 addresses: addrs, 470 scUpdateCh: b.scUpdateCh, 471 listener: oldListener, 472 latestHealthState: balancer.SubConnState{ConnectivityState: connectivity.Connecting}, 473 } 474 opts.StateListener = func(state balancer.SubConnState) { b.updateSubConnState(scw, state) } 475 b.mu.Lock() 476 defer b.mu.Unlock() 477 sc, err := b.ClientConn.NewSubConn(addrs, opts) 478 if err != nil { 479 return nil, err 480 } 481 scw.SubConn = sc 482 if len(addrs) != 1 { 483 return scw, nil 484 } 485 epInfo, ok := b.addrs[addrs[0].Addr] 486 if !ok { 487 return scw, nil 488 } 489 epInfo.sws = append(epInfo.sws, scw) 490 scw.endpointInfo.Store(epInfo) 491 if !epInfo.latestEjectionTimestamp.IsZero() { 492 scw.eject() 493 } 494 return scw, nil 495 } 496 497 func (b *outlierDetectionBalancer) RemoveSubConn(sc balancer.SubConn) { 498 b.logger.Errorf("RemoveSubConn(%v) called unexpectedly", sc) 499 } 500 501 // appendIfPresent appends the scw to the endpoint, if the address is present in 502 // the Outlier Detection balancers address map. Returns nil if not present, and 503 // the map entry if present. 504 // 505 // Caller must hold b.mu. 506 func (b *outlierDetectionBalancer) appendIfPresent(addr string, scw *subConnWrapper) *endpointInfo { 507 epInfo, ok := b.addrs[addr] 508 if !ok { 509 return nil 510 } 511 512 epInfo.sws = append(epInfo.sws, scw) 513 scw.endpointInfo.Store(epInfo) 514 return epInfo 515 } 516 517 // removeSubConnFromEndpointMapEntry removes the scw from its map entry if 518 // present. 519 // 520 // Caller must hold b.mu. 521 func (b *outlierDetectionBalancer) removeSubConnFromEndpointMapEntry(scw *subConnWrapper) { 522 epInfo := scw.endpointInfo.Load() 523 if epInfo == nil { 524 return 525 } 526 for i, sw := range epInfo.sws { 527 if scw == sw { 528 epInfo.sws = append(epInfo.sws[:i], epInfo.sws[i+1:]...) 529 return 530 } 531 } 532 } 533 534 func (b *outlierDetectionBalancer) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) { 535 scw, ok := sc.(*subConnWrapper) 536 if !ok { 537 // Return, shouldn't happen if passed up scw 538 return 539 } 540 541 b.ClientConn.UpdateAddresses(scw.SubConn, addrs) 542 b.mu.Lock() 543 defer b.mu.Unlock() 544 545 // Note that 0 addresses is a valid update/state for a SubConn to be in. 546 // This is correctly handled by this algorithm (handled as part of a non singular 547 // old address/new address). 548 switch { 549 case len(scw.addresses) == 1 && len(addrs) == 1: // single address to single address 550 // If the updated address is the same, then there is nothing to do 551 // past this point. 552 if scw.addresses[0].Addr == addrs[0].Addr { 553 return 554 } 555 b.removeSubConnFromEndpointMapEntry(scw) 556 endpointInfo := b.appendIfPresent(addrs[0].Addr, scw) 557 if endpointInfo == nil { // uneject unconditionally because could have come from an ejected endpoint 558 scw.uneject() 559 break 560 } 561 if endpointInfo.latestEjectionTimestamp.IsZero() { // relay new updated subconn state 562 scw.uneject() 563 } else { 564 scw.eject() 565 } 566 case len(scw.addresses) == 1: // single address to multiple/no addresses 567 b.removeSubConnFromEndpointMapEntry(scw) 568 addrInfo := scw.endpointInfo.Load() 569 if addrInfo != nil { 570 addrInfo.callCounter.clear() 571 } 572 scw.uneject() 573 case len(addrs) == 1: // multiple/no addresses to single address 574 endpointInfo := b.appendIfPresent(addrs[0].Addr, scw) 575 if endpointInfo != nil && !endpointInfo.latestEjectionTimestamp.IsZero() { 576 scw.eject() 577 } 578 } // otherwise multiple/no addresses to multiple/no addresses; ignore 579 580 scw.addresses = addrs 581 } 582 583 // handleSubConnUpdate stores the recent state and forward the update 584 // if the SubConn is not ejected. 585 func (b *outlierDetectionBalancer) handleSubConnUpdate(u *scUpdate) { 586 scw := u.scw 587 scw.clearHealthListener() 588 b.child.updateSubConnState(scw, u.state) 589 } 590 591 func (b *outlierDetectionBalancer) handleSubConnHealthUpdate(u *scHealthUpdate) { 592 b.child.updateSubConnHealthState(u.scw, u.state) 593 } 594 595 // handleEjectedUpdate handles any SubConns that get ejected/unejected, and 596 // forwards the appropriate corresponding subConnState to the child policy. 597 func (b *outlierDetectionBalancer) handleEjectedUpdate(u *ejectionUpdate) { 598 b.child.handleEjectionUpdate(u) 599 } 600 601 // handleChildStateUpdate forwards the picker update wrapped in a wrapped picker 602 // with the noop picker bit present. 603 func (b *outlierDetectionBalancer) handleChildStateUpdate(u balancer.State) { 604 b.childState = u 605 b.mu.Lock() 606 if b.inhibitPickerUpdates { 607 // If a child's state is updated during the suppression of child 608 // updates, the synchronous handleLBConfigUpdate function with respect 609 // to UpdateClientConnState should return a picker unconditionally. 610 b.updateUnconditionally = true 611 b.mu.Unlock() 612 return 613 } 614 noopCfg := b.noopConfig() 615 b.mu.Unlock() 616 b.recentPickerNoop = noopCfg 617 b.ClientConn.UpdateState(balancer.State{ 618 ConnectivityState: b.childState.ConnectivityState, 619 Picker: &wrappedPicker{ 620 childPicker: b.childState.Picker, 621 noopPicker: noopCfg, 622 }, 623 }) 624 } 625 626 // handleLBConfigUpdate compares whether the new config is a noop config or not, 627 // to the noop bit in the picker if present. It updates the picker if this bit 628 // changed compared to the picker currently in use. 629 func (b *outlierDetectionBalancer) handleLBConfigUpdate(u lbCfgUpdate) { 630 lbCfg := u.lbCfg 631 noopCfg := lbCfg.SuccessRateEjection == nil && lbCfg.FailurePercentageEjection == nil 632 // If the child has sent its first update and this config flips the noop 633 // bit compared to the most recent picker update sent upward, then a new 634 // picker with this updated bit needs to be forwarded upward. If a child 635 // update was received during the suppression of child updates within 636 // UpdateClientConnState(), then a new picker needs to be forwarded with 637 // this updated state, irregardless of whether this new configuration flips 638 // the bit. 639 if b.childState.Picker != nil && noopCfg != b.recentPickerNoop || b.updateUnconditionally { 640 b.recentPickerNoop = noopCfg 641 b.ClientConn.UpdateState(balancer.State{ 642 ConnectivityState: b.childState.ConnectivityState, 643 Picker: &wrappedPicker{ 644 childPicker: b.childState.Picker, 645 noopPicker: noopCfg, 646 }, 647 }) 648 } 649 b.inhibitPickerUpdates = false 650 b.updateUnconditionally = false 651 close(u.done) 652 } 653 654 func (b *outlierDetectionBalancer) run() { 655 defer b.done.Fire() 656 for { 657 select { 658 case update, ok := <-b.scUpdateCh.Get(): 659 if !ok { 660 return 661 } 662 b.scUpdateCh.Load() 663 if b.closed.HasFired() { // don't send SubConn updates to child after the balancer has been closed 664 return 665 } 666 switch u := update.(type) { 667 case *scUpdate: 668 b.handleSubConnUpdate(u) 669 case *ejectionUpdate: 670 b.handleEjectedUpdate(u) 671 case *scHealthUpdate: 672 b.handleSubConnHealthUpdate(u) 673 } 674 case update, ok := <-b.pickerUpdateCh.Get(): 675 if !ok { 676 return 677 } 678 b.pickerUpdateCh.Load() 679 if b.closed.HasFired() { // don't send picker updates to grpc after the balancer has been closed 680 return 681 } 682 switch u := update.(type) { 683 case balancer.State: 684 b.handleChildStateUpdate(u) 685 case lbCfgUpdate: 686 b.handleLBConfigUpdate(u) 687 } 688 case <-b.closed.Done(): 689 return 690 } 691 } 692 } 693 694 // intervalTimerAlgorithm ejects and unejects endpoints based on the Outlier 695 // Detection configuration and data about each endpoint from the previous 696 // interval. 697 func (b *outlierDetectionBalancer) intervalTimerAlgorithm() { 698 b.mu.Lock() 699 defer b.mu.Unlock() 700 b.timerStartTime = time.Now() 701 702 for _, epInfo := range b.endpoints.Values() { 703 epInfo.callCounter.swap() 704 } 705 706 if b.cfg.SuccessRateEjection != nil { 707 b.successRateAlgorithm() 708 } 709 710 if b.cfg.FailurePercentageEjection != nil { 711 b.failurePercentageAlgorithm() 712 } 713 714 for _, epInfo := range b.endpoints.Values() { 715 if epInfo.latestEjectionTimestamp.IsZero() && epInfo.ejectionTimeMultiplier > 0 { 716 epInfo.ejectionTimeMultiplier-- 717 continue 718 } 719 if epInfo.latestEjectionTimestamp.IsZero() { 720 // Endpoint is already not ejected, so no need to check for whether 721 // to uneject the endpoint below. 722 continue 723 } 724 et := time.Duration(b.cfg.BaseEjectionTime) * time.Duration(epInfo.ejectionTimeMultiplier) 725 met := max(time.Duration(b.cfg.BaseEjectionTime), time.Duration(b.cfg.MaxEjectionTime)) 726 uet := epInfo.latestEjectionTimestamp.Add(min(et, met)) 727 if now().After(uet) { 728 b.unejectEndpoint(epInfo) 729 } 730 } 731 732 // This conditional only for testing (since the interval timer algorithm is 733 // called manually), will never hit in production. 734 if b.intervalTimer != nil { 735 b.intervalTimer.Stop() 736 } 737 b.intervalTimer = afterFunc(time.Duration(b.cfg.Interval), b.intervalTimerAlgorithm) 738 } 739 740 // endpointsWithAtLeastRequestVolume returns a slice of endpoint information of 741 // all endpoints with at least request volume passed in. 742 // 743 // Caller must hold b.mu. 744 func (b *outlierDetectionBalancer) endpointsWithAtLeastRequestVolume(requestVolume uint32) []*endpointInfo { 745 var endpoints []*endpointInfo 746 for _, epInfo := range b.endpoints.Values() { 747 bucket1 := epInfo.callCounter.inactiveBucket 748 rv := bucket1.numSuccesses + bucket1.numFailures 749 if rv >= requestVolume { 750 endpoints = append(endpoints, epInfo) 751 } 752 } 753 return endpoints 754 } 755 756 // meanAndStdDev returns the mean and std dev of the fractions of successful 757 // requests of the endpoints passed in. 758 // 759 // Caller must hold b.mu. 760 func (b *outlierDetectionBalancer) meanAndStdDev(endpoints []*endpointInfo) (float64, float64) { 761 var totalFractionOfSuccessfulRequests float64 762 var mean float64 763 for _, epInfo := range endpoints { 764 bucket := epInfo.callCounter.inactiveBucket 765 rv := bucket.numSuccesses + bucket.numFailures 766 totalFractionOfSuccessfulRequests += float64(bucket.numSuccesses) / float64(rv) 767 } 768 mean = totalFractionOfSuccessfulRequests / float64(len(endpoints)) 769 var sumOfSquares float64 770 for _, epInfo := range endpoints { 771 bucket := epInfo.callCounter.inactiveBucket 772 rv := bucket.numSuccesses + bucket.numFailures 773 devFromMean := (float64(bucket.numSuccesses) / float64(rv)) - mean 774 sumOfSquares += devFromMean * devFromMean 775 } 776 variance := sumOfSquares / float64(len(endpoints)) 777 return mean, math.Sqrt(variance) 778 } 779 780 // successRateAlgorithm ejects any endpoints where the success rate falls below 781 // the other endpoints according to mean and standard deviation, and if overall 782 // applicable from other set heuristics. 783 // 784 // Caller must hold b.mu. 785 func (b *outlierDetectionBalancer) successRateAlgorithm() { 786 endpointsToConsider := b.endpointsWithAtLeastRequestVolume(b.cfg.SuccessRateEjection.RequestVolume) 787 if len(endpointsToConsider) < int(b.cfg.SuccessRateEjection.MinimumHosts) { 788 return 789 } 790 mean, stddev := b.meanAndStdDev(endpointsToConsider) 791 for _, epInfo := range endpointsToConsider { 792 bucket := epInfo.callCounter.inactiveBucket 793 ejectionCfg := b.cfg.SuccessRateEjection 794 if float64(b.numEndpointsEjected)/float64(b.endpoints.Len())*100 >= float64(b.cfg.MaxEjectionPercent) { 795 return 796 } 797 successRate := float64(bucket.numSuccesses) / float64(bucket.numSuccesses+bucket.numFailures) 798 requiredSuccessRate := mean - stddev*(float64(ejectionCfg.StdevFactor)/1000) 799 if successRate < requiredSuccessRate { 800 channelz.Infof(logger, b.channelzParent, "SuccessRate algorithm detected outlier: %s. Parameters: successRate=%f, mean=%f, stddev=%f, requiredSuccessRate=%f", epInfo, successRate, mean, stddev, requiredSuccessRate) 801 if uint32(rand.Int32N(100)) < ejectionCfg.EnforcementPercentage { 802 b.ejectEndpoint(epInfo) 803 } 804 } 805 } 806 } 807 808 // failurePercentageAlgorithm ejects any endpoints where the failure percentage 809 // rate exceeds a set enforcement percentage, if overall applicable from other 810 // set heuristics. 811 // 812 // Caller must hold b.mu. 813 func (b *outlierDetectionBalancer) failurePercentageAlgorithm() { 814 endpointsToConsider := b.endpointsWithAtLeastRequestVolume(b.cfg.FailurePercentageEjection.RequestVolume) 815 if len(endpointsToConsider) < int(b.cfg.FailurePercentageEjection.MinimumHosts) { 816 return 817 } 818 819 for _, epInfo := range endpointsToConsider { 820 bucket := epInfo.callCounter.inactiveBucket 821 ejectionCfg := b.cfg.FailurePercentageEjection 822 if float64(b.numEndpointsEjected)/float64(b.endpoints.Len())*100 >= float64(b.cfg.MaxEjectionPercent) { 823 return 824 } 825 failurePercentage := (float64(bucket.numFailures) / float64(bucket.numSuccesses+bucket.numFailures)) * 100 826 if failurePercentage > float64(b.cfg.FailurePercentageEjection.Threshold) { 827 channelz.Infof(logger, b.channelzParent, "FailurePercentage algorithm detected outlier: %s, failurePercentage=%f", epInfo, failurePercentage) 828 if uint32(rand.Int32N(100)) < ejectionCfg.EnforcementPercentage { 829 b.ejectEndpoint(epInfo) 830 } 831 } 832 } 833 } 834 835 // Caller must hold b.mu. 836 func (b *outlierDetectionBalancer) ejectEndpoint(epInfo *endpointInfo) { 837 b.numEndpointsEjected++ 838 epInfo.latestEjectionTimestamp = b.timerStartTime 839 epInfo.ejectionTimeMultiplier++ 840 for _, sbw := range epInfo.sws { 841 sbw.eject() 842 channelz.Infof(logger, b.channelzParent, "Subchannel ejected: %s", sbw) 843 } 844 845 } 846 847 // Caller must hold b.mu. 848 func (b *outlierDetectionBalancer) unejectEndpoint(epInfo *endpointInfo) { 849 b.numEndpointsEjected-- 850 epInfo.latestEjectionTimestamp = time.Time{} 851 for _, sbw := range epInfo.sws { 852 sbw.uneject() 853 channelz.Infof(logger, b.channelzParent, "Subchannel unejected: %s", sbw) 854 } 855 } 856 857 // synchronizingBalancerWrapper serializes calls into balancer (to uphold the 858 // balancer.Balancer API guarantee of synchronous calls). It also ensures a 859 // consistent order of locking mutexes when using SubConn listeners to avoid 860 // deadlocks. 861 type synchronizingBalancerWrapper struct { 862 // mu should not be used directly from outside this struct, instead use 863 // methods defined on the struct. 864 mu sync.Mutex 865 lb *gracefulswitch.Balancer 866 } 867 868 func (sbw *synchronizingBalancerWrapper) switchTo(builder balancer.Builder) error { 869 sbw.mu.Lock() 870 defer sbw.mu.Unlock() 871 return sbw.lb.SwitchTo(builder) 872 } 873 874 func (sbw *synchronizingBalancerWrapper) updateClientConnState(state balancer.ClientConnState) error { 875 sbw.mu.Lock() 876 defer sbw.mu.Unlock() 877 return sbw.lb.UpdateClientConnState(state) 878 } 879 880 func (sbw *synchronizingBalancerWrapper) resolverError(err error) { 881 sbw.mu.Lock() 882 defer sbw.mu.Unlock() 883 sbw.lb.ResolverError(err) 884 } 885 886 func (sbw *synchronizingBalancerWrapper) closeLB() { 887 sbw.mu.Lock() 888 defer sbw.mu.Unlock() 889 sbw.lb.Close() 890 } 891 892 func (sbw *synchronizingBalancerWrapper) exitIdle() { 893 sbw.mu.Lock() 894 defer sbw.mu.Unlock() 895 sbw.lb.ExitIdle() 896 } 897 898 func (sbw *synchronizingBalancerWrapper) updateSubConnHealthState(scw *subConnWrapper, scs balancer.SubConnState) { 899 sbw.mu.Lock() 900 defer sbw.mu.Unlock() 901 scw.updateSubConnHealthState(scs) 902 } 903 904 func (sbw *synchronizingBalancerWrapper) updateSubConnState(scw *subConnWrapper, scs balancer.SubConnState) { 905 sbw.mu.Lock() 906 defer sbw.mu.Unlock() 907 scw.updateSubConnConnectivityState(scs) 908 } 909 910 func (sbw *synchronizingBalancerWrapper) handleEjectionUpdate(u *ejectionUpdate) { 911 sbw.mu.Lock() 912 defer sbw.mu.Unlock() 913 if u.isEjected { 914 u.scw.handleEjection() 915 } else { 916 u.scw.handleUnejection() 917 } 918 } 919 920 // endpointInfo contains the runtime information about an endpoint that pertains 921 // to Outlier Detection. This struct and all of its fields is protected by 922 // outlierDetectionBalancer.mu in the case where it is accessed through the 923 // address or endpoint map. In the case of Picker callbacks, the writes to the 924 // activeBucket of callCounter are protected by atomically loading and storing 925 // unsafe.Pointers (see further explanation in incrementCounter()). 926 type endpointInfo struct { 927 // The call result counter object. 928 callCounter *callCounter 929 930 // The latest ejection timestamp, or zero if the endpoint is currently not 931 // ejected. 932 latestEjectionTimestamp time.Time 933 934 // The current ejection time multiplier, starting at 0. 935 ejectionTimeMultiplier int64 936 937 // A list of subchannel wrapper objects that correspond to this endpoint. 938 sws []*subConnWrapper 939 } 940 941 func (a *endpointInfo) String() string { 942 var res strings.Builder 943 res.WriteString("[") 944 for _, sw := range a.sws { 945 res.WriteString(sw.String()) 946 } 947 res.WriteString("]") 948 return res.String() 949 } 950 951 func newEndpointInfo() *endpointInfo { 952 return &endpointInfo{ 953 callCounter: newCallCounter(), 954 } 955 }