google.golang.org/grpc@v1.74.2/balancer/ringhash/ringhash.go (about) 1 /* 2 * 3 * Copyright 2021 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package ringhash implements the ringhash balancer. See the following 20 // gRFCs for details: 21 // - https://github.com/grpc/proposal/blob/master/A42-xds-ring-hash-lb-policy.md 22 // - https://github.com/grpc/proposal/blob/master/A61-IPv4-IPv6-dualstack-backends.md#ring-hash 23 // - https://github.com/grpc/proposal/blob/master/A76-ring-hash-improvements.md 24 // 25 // # Experimental 26 // 27 // Notice: This package is EXPERIMENTAL and may be changed or removed in a 28 // later release. 29 package ringhash 30 31 import ( 32 "encoding/json" 33 "errors" 34 "fmt" 35 "math/rand/v2" 36 "sort" 37 "sync" 38 39 "google.golang.org/grpc/balancer" 40 "google.golang.org/grpc/balancer/base" 41 "google.golang.org/grpc/balancer/endpointsharding" 42 "google.golang.org/grpc/balancer/lazy" 43 "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" 44 "google.golang.org/grpc/connectivity" 45 "google.golang.org/grpc/internal/balancer/weight" 46 "google.golang.org/grpc/internal/grpclog" 47 "google.golang.org/grpc/internal/pretty" 48 iringhash "google.golang.org/grpc/internal/ringhash" 49 "google.golang.org/grpc/resolver" 50 "google.golang.org/grpc/resolver/ringhash" 51 "google.golang.org/grpc/serviceconfig" 52 ) 53 54 // Name is the name of the ring_hash balancer. 55 const Name = "ring_hash_experimental" 56 57 func lazyPickFirstBuilder(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 58 return lazy.NewBalancer(cc, opts, balancer.Get(pickfirstleaf.Name).Build) 59 } 60 61 func init() { 62 balancer.Register(bb{}) 63 } 64 65 type bb struct{} 66 67 func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 68 b := &ringhashBalancer{ 69 ClientConn: cc, 70 endpointStates: resolver.NewEndpointMap[*endpointState](), 71 } 72 esOpts := endpointsharding.Options{DisableAutoReconnect: true} 73 b.child = endpointsharding.NewBalancer(b, opts, lazyPickFirstBuilder, esOpts) 74 b.logger = prefixLogger(b) 75 b.logger.Infof("Created") 76 return b 77 } 78 79 func (bb) Name() string { 80 return Name 81 } 82 83 func (bb) ParseConfig(c json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { 84 return parseConfig(c) 85 } 86 87 type ringhashBalancer struct { 88 // The following fields are initialized at build time and read-only after 89 // that and therefore do not need to be guarded by a mutex. 90 91 // ClientConn is embedded to intercept UpdateState calls from the child 92 // endpointsharding balancer. 93 balancer.ClientConn 94 logger *grpclog.PrefixLogger 95 child balancer.Balancer 96 97 mu sync.Mutex 98 config *iringhash.LBConfig 99 inhibitChildUpdates bool 100 shouldRegenerateRing bool 101 endpointStates *resolver.EndpointMap[*endpointState] 102 103 // ring is always in sync with endpoints. When endpoints change, a new ring 104 // is generated. Note that address weights updates also regenerates the 105 // ring. 106 ring *ring 107 } 108 109 // hashKey returns the hash key to use for an endpoint. Per gRFC A61, each entry 110 // in the ring is a hash of the endpoint's hash key concatenated with a 111 // per-entry unique suffix. 112 func hashKey(endpoint resolver.Endpoint) string { 113 if hk := ringhash.HashKey(endpoint); hk != "" { 114 return hk 115 } 116 // If no hash key is set, use the endpoint's first address as the hash key. 117 // This is the default behavior when no hash key is set. 118 return endpoint.Addresses[0].Addr 119 } 120 121 // UpdateState intercepts child balancer state updates. It updates the 122 // per-endpoint state stored in the ring, and also the aggregated state based on 123 // the child picker. It also reconciles the endpoint list. It sets 124 // `b.shouldRegenerateRing` to true if the new endpoint list is different from 125 // the previous, i.e. any of the following is true: 126 // - an endpoint was added 127 // - an endpoint was removed 128 // - an endpoint's weight was updated 129 // - the first addresses of the endpoint has changed 130 func (b *ringhashBalancer) UpdateState(state balancer.State) { 131 b.mu.Lock() 132 defer b.mu.Unlock() 133 childStates := endpointsharding.ChildStatesFromPicker(state.Picker) 134 // endpointsSet is the set converted from endpoints, used for quick lookup. 135 endpointsSet := resolver.NewEndpointMap[bool]() 136 137 for _, childState := range childStates { 138 endpoint := childState.Endpoint 139 endpointsSet.Set(endpoint, true) 140 newWeight := getWeightAttribute(endpoint) 141 hk := hashKey(endpoint) 142 es, ok := b.endpointStates.Get(endpoint) 143 if !ok { 144 es := &endpointState{ 145 balancer: childState.Balancer, 146 hashKey: hk, 147 weight: newWeight, 148 state: childState.State, 149 } 150 b.endpointStates.Set(endpoint, es) 151 b.shouldRegenerateRing = true 152 } else { 153 // We have seen this endpoint before and created a `endpointState` 154 // object for it. If the weight or the hash key of the endpoint has 155 // changed, update the endpoint state map with the new weight or 156 // hash key. This will be used when a new ring is created. 157 if oldWeight := es.weight; oldWeight != newWeight { 158 b.shouldRegenerateRing = true 159 es.weight = newWeight 160 } 161 if es.hashKey != hk { 162 b.shouldRegenerateRing = true 163 es.hashKey = hk 164 } 165 es.state = childState.State 166 } 167 } 168 169 for _, endpoint := range b.endpointStates.Keys() { 170 if _, ok := endpointsSet.Get(endpoint); ok { 171 continue 172 } 173 // endpoint was removed by resolver. 174 b.endpointStates.Delete(endpoint) 175 b.shouldRegenerateRing = true 176 } 177 178 b.updatePickerLocked() 179 } 180 181 func (b *ringhashBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error { 182 if b.logger.V(2) { 183 b.logger.Infof("Received update from resolver, balancer config: %+v", pretty.ToJSON(ccs.BalancerConfig)) 184 } 185 186 newConfig, ok := ccs.BalancerConfig.(*iringhash.LBConfig) 187 if !ok { 188 return fmt.Errorf("unexpected balancer config with type: %T", ccs.BalancerConfig) 189 } 190 191 b.mu.Lock() 192 b.inhibitChildUpdates = true 193 b.mu.Unlock() 194 195 defer func() { 196 b.mu.Lock() 197 b.inhibitChildUpdates = false 198 b.updatePickerLocked() 199 b.mu.Unlock() 200 }() 201 202 if err := b.child.UpdateClientConnState(balancer.ClientConnState{ 203 // Make pickfirst children use health listeners for outlier detection 204 // and health checking to work. 205 ResolverState: pickfirstleaf.EnableHealthListener(ccs.ResolverState), 206 }); err != nil { 207 return err 208 } 209 210 b.mu.Lock() 211 // Ring updates can happen due to the following: 212 // 1. Addition or deletion of endpoints: The synchronous picker update from 213 // the child endpointsharding balancer would contain the list of updated 214 // endpoints. Updates triggered by the child after handling the 215 // `UpdateClientConnState` call will not change the endpoint list. 216 // 2. Change in the `LoadBalancerConfig`: Ring config such as max/min ring 217 // size. 218 // To avoid extra ring updates, a boolean is used to track the need for a 219 // ring update and the update is done only once at the end. 220 // 221 // If the ring configuration has changed, we need to regenerate the ring 222 // while sending a new picker. 223 if b.config == nil || b.config.MinRingSize != newConfig.MinRingSize || b.config.MaxRingSize != newConfig.MaxRingSize { 224 b.shouldRegenerateRing = true 225 } 226 b.config = newConfig 227 b.mu.Unlock() 228 return nil 229 } 230 231 func (b *ringhashBalancer) ResolverError(err error) { 232 b.child.ResolverError(err) 233 } 234 235 func (b *ringhashBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 236 b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state) 237 } 238 239 func (b *ringhashBalancer) updatePickerLocked() { 240 state := b.aggregatedStateLocked() 241 // Start connecting to new endpoints if necessary. 242 if state == connectivity.Connecting || state == connectivity.TransientFailure { 243 // When overall state is TransientFailure, we need to make sure at least 244 // one endpoint is attempting to connect, otherwise this balancer may 245 // never get picks if the parent is priority. 246 // 247 // Because we report Connecting as the overall state when only one 248 // endpoint is in TransientFailure, we do the same check for Connecting 249 // here. 250 // 251 // Note that this check also covers deleting endpoints. E.g. if the 252 // endpoint attempting to connect is deleted, and the overall state is 253 // TF. Since there must be at least one endpoint attempting to connect, 254 // we need to trigger one. 255 // 256 // After calling `ExitIdle` on a child balancer, the child will send a 257 // picker update asynchronously. A race condition may occur if another 258 // picker update from endpointsharding arrives before the child's 259 // picker update. The received picker may trigger a re-execution of the 260 // loop below to find an idle child. Since map iteration order is 261 // non-deterministic, the list of `endpointState`s must be sorted to 262 // ensure `ExitIdle` is called on the same child, preventing unnecessary 263 // connections. 264 var endpointStates = make([]*endpointState, b.endpointStates.Len()) 265 for i, s := range b.endpointStates.Values() { 266 endpointStates[i] = s 267 } 268 sort.Slice(endpointStates, func(i, j int) bool { 269 return endpointStates[i].hashKey < endpointStates[j].hashKey 270 }) 271 var idleBalancer endpointsharding.ExitIdler 272 for _, es := range endpointStates { 273 connState := es.state.ConnectivityState 274 if connState == connectivity.Connecting { 275 idleBalancer = nil 276 break 277 } 278 if idleBalancer == nil && connState == connectivity.Idle { 279 idleBalancer = es.balancer 280 } 281 } 282 if idleBalancer != nil { 283 idleBalancer.ExitIdle() 284 } 285 } 286 287 if b.inhibitChildUpdates { 288 return 289 } 290 291 // Update the channel. 292 if b.endpointStates.Len() > 0 && b.shouldRegenerateRing { 293 // with a non-empty list of endpoints. 294 b.ring = newRing(b.endpointStates, b.config.MinRingSize, b.config.MaxRingSize, b.logger) 295 } 296 b.shouldRegenerateRing = false 297 var newPicker balancer.Picker 298 if b.endpointStates.Len() == 0 { 299 newPicker = base.NewErrPicker(errors.New("produced zero addresses")) 300 } else { 301 newPicker = b.newPickerLocked() 302 } 303 b.ClientConn.UpdateState(balancer.State{ 304 ConnectivityState: state, 305 Picker: newPicker, 306 }) 307 } 308 309 func (b *ringhashBalancer) Close() { 310 b.logger.Infof("Shutdown") 311 b.child.Close() 312 } 313 314 func (b *ringhashBalancer) ExitIdle() { 315 // ExitIdle implementation is a no-op because connections are either 316 // triggers from picks or from child balancer state changes. 317 } 318 319 // newPickerLocked generates a picker. The picker copies the endpoint states 320 // over to avoid locking the mutex at RPC time. The picker should be 321 // re-generated every time an endpoint state is updated. 322 func (b *ringhashBalancer) newPickerLocked() *picker { 323 states := make(map[string]endpointState) 324 hasEndpointConnecting := false 325 for _, epState := range b.endpointStates.Values() { 326 // Copy the endpoint state to avoid races, since ring hash 327 // mutates the state, weight and hash key in place. 328 states[epState.hashKey] = *epState 329 if epState.state.ConnectivityState == connectivity.Connecting { 330 hasEndpointConnecting = true 331 } 332 } 333 return &picker{ 334 ring: b.ring, 335 endpointStates: states, 336 requestHashHeader: b.config.RequestHashHeader, 337 hasEndpointInConnectingState: hasEndpointConnecting, 338 randUint64: rand.Uint64, 339 } 340 } 341 342 // aggregatedStateLocked returns the aggregated child balancers state 343 // based on the following rules. 344 // - If there is at least one endpoint in READY state, report READY. 345 // - If there are 2 or more endpoints in TRANSIENT_FAILURE state, report 346 // TRANSIENT_FAILURE. 347 // - If there is at least one endpoint in CONNECTING state, report CONNECTING. 348 // - If there is one endpoint in TRANSIENT_FAILURE and there is more than one 349 // endpoint, report state CONNECTING. 350 // - If there is at least one endpoint in Idle state, report Idle. 351 // - Otherwise, report TRANSIENT_FAILURE. 352 // 353 // Note that if there are 1 connecting, 2 transient failure, the overall state 354 // is transient failure. This is because the second transient failure is a 355 // fallback of the first failing endpoint, and we want to report transient 356 // failure to failover to the lower priority. 357 func (b *ringhashBalancer) aggregatedStateLocked() connectivity.State { 358 var nums [5]int 359 for _, es := range b.endpointStates.Values() { 360 nums[es.state.ConnectivityState]++ 361 } 362 363 if nums[connectivity.Ready] > 0 { 364 return connectivity.Ready 365 } 366 if nums[connectivity.TransientFailure] > 1 { 367 return connectivity.TransientFailure 368 } 369 if nums[connectivity.Connecting] > 0 { 370 return connectivity.Connecting 371 } 372 if nums[connectivity.TransientFailure] == 1 && b.endpointStates.Len() > 1 { 373 return connectivity.Connecting 374 } 375 if nums[connectivity.Idle] > 0 { 376 return connectivity.Idle 377 } 378 return connectivity.TransientFailure 379 } 380 381 // getWeightAttribute is a convenience function which returns the value of the 382 // weight endpoint Attribute. 383 // 384 // When used in the xDS context, the weight attribute is guaranteed to be 385 // non-zero. But, when used in a non-xDS context, the weight attribute could be 386 // unset. A Default of 1 is used in the latter case. 387 func getWeightAttribute(e resolver.Endpoint) uint32 { 388 w := weight.FromEndpoint(e).Weight 389 if w == 0 { 390 return 1 391 } 392 return w 393 } 394 395 type endpointState struct { 396 // hashKey is the hash key of the endpoint. Per gRFC A61, each entry in the 397 // ring is an endpoint, positioned based on the hash of the endpoint's first 398 // address by default. Per gRFC A76, the hash key of an endpoint may be 399 // overridden, for example based on EDS endpoint metadata. 400 hashKey string 401 weight uint32 402 balancer endpointsharding.ExitIdler 403 404 // state is updated by the balancer while receiving resolver updates from 405 // the channel and picker updates from its children. Access to it is guarded 406 // by ringhashBalancer.mu. 407 state balancer.State 408 }