gitee.com/zhaochuninhefei/gmgo@v0.0.31-0.20240209061119-069254a02979/grpc/balancer/grpclb/grpclb.go (about) 1 /* 2 * 3 * Copyright 2016 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // package grpclb defines a grpclb balancer. 20 // 21 // To install grpclb balancer, import this package as: 22 // import _ "gitee.com/zhaochuninhefei/gmgo/grpc/balancer/grpclb" 23 package grpclb 24 25 import ( 26 "context" 27 "errors" 28 "fmt" 29 "sync" 30 "time" 31 32 grpc "gitee.com/zhaochuninhefei/gmgo/grpc" 33 "gitee.com/zhaochuninhefei/gmgo/grpc/balancer" 34 grpclbstate "gitee.com/zhaochuninhefei/gmgo/grpc/balancer/grpclb/state" 35 "gitee.com/zhaochuninhefei/gmgo/grpc/connectivity" 36 "gitee.com/zhaochuninhefei/gmgo/grpc/credentials" 37 "gitee.com/zhaochuninhefei/gmgo/grpc/grpclog" 38 "gitee.com/zhaochuninhefei/gmgo/grpc/internal" 39 "gitee.com/zhaochuninhefei/gmgo/grpc/internal/backoff" 40 "gitee.com/zhaochuninhefei/gmgo/grpc/internal/resolver/dns" 41 "gitee.com/zhaochuninhefei/gmgo/grpc/resolver" 42 43 lbpb "gitee.com/zhaochuninhefei/gmgo/grpc/balancer/grpclb/grpc_lb_v1" 44 durationpb "github.com/golang/protobuf/ptypes/duration" 45 ) 46 47 const ( 48 lbTokenKey = "lb-token" 49 defaultFallbackTimeout = 10 * time.Second 50 grpclbName = "grpclb" 51 ) 52 53 var errServerTerminatedConnection = errors.New("grpclb: failed to recv server list: server terminated connection") 54 var logger = grpclog.Component("grpclb") 55 56 func convertDuration(d *durationpb.Duration) time.Duration { 57 if d == nil { 58 return 0 59 } 60 return time.Duration(d.Seconds)*time.Second + time.Duration(d.Nanos)*time.Nanosecond 61 } 62 63 // Client API for LoadBalancer service. 64 // Mostly copied from generated pb.go file. 65 // To avoid circular dependency. 66 type loadBalancerClient struct { 67 cc *grpc.ClientConn 68 } 69 70 func (c *loadBalancerClient) BalanceLoad(ctx context.Context, opts ...grpc.CallOption) (*balanceLoadClientStream, error) { 71 desc := &grpc.StreamDesc{ 72 StreamName: "BalanceLoad", 73 ServerStreams: true, 74 ClientStreams: true, 75 } 76 stream, err := c.cc.NewStream(ctx, desc, "/grpc.lb.v1.LoadBalancer/BalanceLoad", opts...) 77 if err != nil { 78 return nil, err 79 } 80 x := &balanceLoadClientStream{stream} 81 return x, nil 82 } 83 84 type balanceLoadClientStream struct { 85 grpc.ClientStream 86 } 87 88 func (x *balanceLoadClientStream) Send(m *lbpb.LoadBalanceRequest) error { 89 return x.ClientStream.SendMsg(m) 90 } 91 92 func (x *balanceLoadClientStream) Recv() (*lbpb.LoadBalanceResponse, error) { 93 m := new(lbpb.LoadBalanceResponse) 94 if err := x.ClientStream.RecvMsg(m); err != nil { 95 return nil, err 96 } 97 return m, nil 98 } 99 100 func init() { 101 balancer.Register(newLBBuilder()) 102 dns.EnableSRVLookups = true 103 } 104 105 // newLBBuilder creates a builder for grpclb. 106 func newLBBuilder() balancer.Builder { 107 return newLBBuilderWithFallbackTimeout(defaultFallbackTimeout) 108 } 109 110 // newLBBuilderWithFallbackTimeout creates a grpclb builder with the given 111 // fallbackTimeout. If no response is received from the remote balancer within 112 // fallbackTimeout, the backend addresses from the resolved address list will be 113 // used. 114 // 115 // Only call this function when a non-default fallback timeout is needed. 116 func newLBBuilderWithFallbackTimeout(fallbackTimeout time.Duration) balancer.Builder { 117 return &lbBuilder{ 118 fallbackTimeout: fallbackTimeout, 119 } 120 } 121 122 type lbBuilder struct { 123 fallbackTimeout time.Duration 124 } 125 126 func (b *lbBuilder) Name() string { 127 return grpclbName 128 } 129 130 func (b *lbBuilder) Build(cc balancer.ClientConn, opt balancer.BuildOptions) balancer.Balancer { 131 // This generates a manual resolver builder with a fixed scheme. This 132 // scheme will be used to dial to remote LB, so we can send filtered 133 // address updates to remote LB ClientConn using this manual resolver. 134 r := &lbManualResolver{scheme: "grpclb-internal", ccb: cc} 135 136 lb := &lbBalancer{ 137 cc: newLBCacheClientConn(cc), 138 // Endpoint is deprecated, use GetEndpoint() instead. 139 //dialTarget: opt.Target.Endpoint, 140 //target: opt.Target.Endpoint, 141 dialTarget: opt.Target.GetEndpoint(), 142 target: opt.Target.GetEndpoint(), 143 opt: opt, 144 fallbackTimeout: b.fallbackTimeout, 145 doneCh: make(chan struct{}), 146 147 manualResolver: r, 148 subConns: make(map[resolver.Address]balancer.SubConn), 149 scStates: make(map[balancer.SubConn]connectivity.State), 150 picker: &errPicker{err: balancer.ErrNoSubConnAvailable}, 151 clientStats: newRPCStats(), 152 backoff: backoff.DefaultExponential, // TODO: make backoff configurable. 153 } 154 155 var err error 156 if opt.CredsBundle != nil { 157 lb.grpclbClientConnCreds, err = opt.CredsBundle.NewWithMode(internal.CredsBundleModeBalancer) 158 if err != nil { 159 logger.Warningf("lbBalancer: client connection creds NewWithMode failed: %v", err) 160 } 161 lb.grpclbBackendCreds, err = opt.CredsBundle.NewWithMode(internal.CredsBundleModeBackendFromBalancer) 162 if err != nil { 163 logger.Warningf("lbBalancer: backend creds NewWithMode failed: %v", err) 164 } 165 } 166 167 return lb 168 } 169 170 type lbBalancer struct { 171 cc *lbCacheClientConn 172 dialTarget string // user's dial target 173 target string // same as dialTarget unless overridden in service config 174 opt balancer.BuildOptions 175 176 usePickFirst bool 177 178 // grpclbClientConnCreds is the creds bundle to be used to connect to grpclb 179 // servers. If it's nil, use the TransportCredentials from BuildOptions 180 // instead. 181 grpclbClientConnCreds credentials.Bundle 182 // grpclbBackendCreds is the creds bundle to be used for addresses that are 183 // returned by grpclb server. If it's nil, don't set anything when creating 184 // SubConns. 185 grpclbBackendCreds credentials.Bundle 186 187 fallbackTimeout time.Duration 188 doneCh chan struct{} 189 190 // manualResolver is used in the remote LB ClientConn inside grpclb. When 191 // resolved address updates are received by grpclb, filtered updates will be 192 // send to remote LB ClientConn through this resolver. 193 manualResolver *lbManualResolver 194 // The ClientConn to talk to the remote balancer. 195 ccRemoteLB *remoteBalancerCCWrapper 196 // backoff for calling remote balancer. 197 backoff backoff.Strategy 198 199 // Support client side load reporting. Each picker gets a reference to this, 200 // and will update its content. 201 clientStats *rpcStats 202 203 mu sync.Mutex // guards everything following. 204 // The full server list including drops, used to check if the newly received 205 // serverList contains anything new. Each generate picker will also have 206 // reference to this list to do the first layer pick. 207 fullServerList []*lbpb.Server 208 // Backend addresses. It's kept so the addresses are available when 209 // switching between round_robin and pickfirst. 210 backendAddrs []resolver.Address 211 // All backends addresses, with metadata set to nil. This list contains all 212 // backend addresses in the same order and with the same duplicates as in 213 // serverlist. When generating picker, a SubConn slice with the same order 214 // but with only READY SCs will be gerenated. 215 backendAddrsWithoutMetadata []resolver.Address 216 // Roundrobin functionalities. 217 state connectivity.State 218 subConns map[resolver.Address]balancer.SubConn // Used to new/remove SubConn. 219 scStates map[balancer.SubConn]connectivity.State // Used to filter READY SubConns. 220 picker balancer.Picker 221 // Support fallback to resolved backend addresses if there's no response 222 // from remote balancer within fallbackTimeout. 223 remoteBalancerConnected bool 224 serverListReceived bool 225 inFallback bool 226 // resolvedBackendAddrs is resolvedAddrs minus remote balancers. It's set 227 // when resolved address updates are received, and read in the goroutine 228 // handling fallback. 229 resolvedBackendAddrs []resolver.Address 230 connErr error // the last connection error 231 } 232 233 // regeneratePicker takes a snapshot of the balancer, and generates a picker from 234 // it. The picker 235 // - always returns ErrTransientFailure if the balancer is in TransientFailure, 236 // - does two layer roundrobin pick otherwise. 237 // Caller must hold lb.mu. 238 func (lb *lbBalancer) regeneratePicker(resetDrop bool) { 239 if lb.state == connectivity.TransientFailure { 240 lb.picker = &errPicker{err: fmt.Errorf("all SubConns are in TransientFailure, last connection error: %v", lb.connErr)} 241 return 242 } 243 244 if lb.state == connectivity.Connecting { 245 lb.picker = &errPicker{err: balancer.ErrNoSubConnAvailable} 246 return 247 } 248 249 var readySCs []balancer.SubConn 250 if lb.usePickFirst { 251 for _, sc := range lb.subConns { 252 readySCs = append(readySCs, sc) 253 break 254 } 255 } else { 256 for _, a := range lb.backendAddrsWithoutMetadata { 257 if sc, ok := lb.subConns[a]; ok { 258 if st, ok := lb.scStates[sc]; ok && st == connectivity.Ready { 259 readySCs = append(readySCs, sc) 260 } 261 } 262 } 263 } 264 265 if len(readySCs) <= 0 { 266 // If there's no ready SubConns, always re-pick. This is to avoid drops 267 // unless at least one SubConn is ready. Otherwise we may drop more 268 // often than want because of drops + re-picks(which become re-drops). 269 // 270 // This doesn't seem to be necessary after the connecting check above. 271 // Kept for safety. 272 lb.picker = &errPicker{err: balancer.ErrNoSubConnAvailable} 273 return 274 } 275 if lb.inFallback { 276 lb.picker = newRRPicker(readySCs) 277 return 278 } 279 if resetDrop { 280 lb.picker = newLBPicker(lb.fullServerList, readySCs, lb.clientStats) 281 return 282 } 283 prevLBPicker, ok := lb.picker.(*lbPicker) 284 if !ok { 285 lb.picker = newLBPicker(lb.fullServerList, readySCs, lb.clientStats) 286 return 287 } 288 prevLBPicker.updateReadySCs(readySCs) 289 } 290 291 // aggregateSubConnStats calculate the aggregated state of SubConns in 292 // lb.SubConns. These SubConns are subconns in use (when switching between 293 // fallback and grpclb). lb.scState contains states for all SubConns, including 294 // those in cache (SubConns are cached for 10 seconds after remove). 295 // 296 // The aggregated state is: 297 // - If at least one SubConn in Ready, the aggregated state is Ready; 298 // - Else if at least one SubConn in Connecting or IDLE, the aggregated state is Connecting; 299 // - It's OK to consider IDLE as Connecting. SubConns never stay in IDLE, 300 // they start to connect immediately. But there's a race between the overall 301 // state is reported, and when the new SubConn state arrives. And SubConns 302 // never go back to IDLE. 303 // - Else the aggregated state is TransientFailure. 304 func (lb *lbBalancer) aggregateSubConnStates() connectivity.State { 305 var numConnecting uint64 306 307 for _, sc := range lb.subConns { 308 if state, ok := lb.scStates[sc]; ok { 309 switch state { 310 case connectivity.Ready: 311 return connectivity.Ready 312 case connectivity.Connecting, connectivity.Idle: 313 numConnecting++ 314 } 315 } 316 } 317 if numConnecting > 0 { 318 return connectivity.Connecting 319 } 320 return connectivity.TransientFailure 321 } 322 323 func (lb *lbBalancer) UpdateSubConnState(sc balancer.SubConn, scs balancer.SubConnState) { 324 s := scs.ConnectivityState 325 if logger.V(2) { 326 logger.Infof("lbBalancer: handle SubConn state change: %p, %v", sc, s) 327 } 328 lb.mu.Lock() 329 defer lb.mu.Unlock() 330 331 oldS, ok := lb.scStates[sc] 332 if !ok { 333 if logger.V(2) { 334 logger.Infof("lbBalancer: got state changes for an unknown SubConn: %p, %v", sc, s) 335 } 336 return 337 } 338 lb.scStates[sc] = s 339 switch s { 340 case connectivity.Idle: 341 sc.Connect() 342 case connectivity.Shutdown: 343 // When an address was removed by resolver, b called RemoveSubConn but 344 // kept the sc's state in scStates. Remove state for this sc here. 345 delete(lb.scStates, sc) 346 case connectivity.TransientFailure: 347 lb.connErr = scs.ConnectionError 348 } 349 // Force regenerate picker if 350 // - this sc became ready from not-ready 351 // - this sc became not-ready from ready 352 lb.updateStateAndPicker((oldS == connectivity.Ready) != (s == connectivity.Ready), false) 353 354 // Enter fallback when the aggregated state is not Ready and the connection 355 // to remote balancer is lost. 356 if lb.state != connectivity.Ready { 357 if !lb.inFallback && !lb.remoteBalancerConnected { 358 // Enter fallback. 359 lb.refreshSubConns(lb.resolvedBackendAddrs, true, lb.usePickFirst) 360 } 361 } 362 } 363 364 // updateStateAndPicker re-calculate the aggregated state, and regenerate picker 365 // if overall state is changed. 366 // 367 // If forceRegeneratePicker is true, picker will be regenerated. 368 func (lb *lbBalancer) updateStateAndPicker(forceRegeneratePicker bool, resetDrop bool) { 369 oldAggrState := lb.state 370 lb.state = lb.aggregateSubConnStates() 371 // Regenerate picker when one of the following happens: 372 // - caller wants to regenerate 373 // - the aggregated state changed 374 if forceRegeneratePicker || (lb.state != oldAggrState) { 375 lb.regeneratePicker(resetDrop) 376 } 377 378 lb.cc.UpdateState(balancer.State{ConnectivityState: lb.state, Picker: lb.picker}) 379 } 380 381 // fallbackToBackendsAfter blocks for fallbackTimeout and falls back to use 382 // resolved backends (backends received from resolver, not from remote balancer) 383 // if no connection to remote balancers was successful. 384 func (lb *lbBalancer) fallbackToBackendsAfter(fallbackTimeout time.Duration) { 385 timer := time.NewTimer(fallbackTimeout) 386 defer timer.Stop() 387 select { 388 case <-timer.C: 389 case <-lb.doneCh: 390 return 391 } 392 lb.mu.Lock() 393 if lb.inFallback || lb.serverListReceived { 394 lb.mu.Unlock() 395 return 396 } 397 // Enter fallback. 398 lb.refreshSubConns(lb.resolvedBackendAddrs, true, lb.usePickFirst) 399 lb.mu.Unlock() 400 } 401 402 func (lb *lbBalancer) handleServiceConfig(gc *grpclbServiceConfig) { 403 lb.mu.Lock() 404 defer lb.mu.Unlock() 405 406 // grpclb uses the user's dial target to populate the `Name` field of the 407 // `InitialLoadBalanceRequest` message sent to the remote balancer. But when 408 // grpclb is used a child policy in the context of RLS, we want the `Name` 409 // field to be populated with the value received from the RLS server. To 410 // support this use case, an optional "target_name" field has been added to 411 // the grpclb LB policy's config. If specified, it overrides the name of 412 // the target to be sent to the remote balancer; if not, the target to be 413 // sent to the balancer will continue to be obtained from the target URI 414 // passed to the gRPC client channel. Whenever that target to be sent to the 415 // balancer is updated, we need to restart the stream to the balancer as 416 // this target is sent in the first message on the stream. 417 if gc != nil { 418 target := lb.dialTarget 419 if gc.TargetName != "" { 420 target = gc.TargetName 421 } 422 if target != lb.target { 423 lb.target = target 424 if lb.ccRemoteLB != nil { 425 lb.ccRemoteLB.cancelRemoteBalancerCall() 426 } 427 } 428 } 429 430 newUsePickFirst := childIsPickFirst(gc) 431 if lb.usePickFirst == newUsePickFirst { 432 return 433 } 434 if logger.V(2) { 435 logger.Infof("lbBalancer: switching mode, new usePickFirst: %+v", newUsePickFirst) 436 } 437 lb.refreshSubConns(lb.backendAddrs, lb.inFallback, newUsePickFirst) 438 } 439 440 func (lb *lbBalancer) ResolverError(error) { 441 // Ignore resolver errors. GRPCLB is not selected unless the resolver 442 // works at least once. 443 } 444 445 func (lb *lbBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error { 446 if logger.V(2) { 447 logger.Infof("lbBalancer: UpdateClientConnState: %+v", ccs) 448 } 449 gc, _ := ccs.BalancerConfig.(*grpclbServiceConfig) 450 lb.handleServiceConfig(gc) 451 452 addrs := ccs.ResolverState.Addresses 453 454 var remoteBalancerAddrs, backendAddrs []resolver.Address 455 for _, a := range addrs { 456 if a.Type == resolver.GRPCLB { 457 a.Type = resolver.Backend 458 remoteBalancerAddrs = append(remoteBalancerAddrs, a) 459 } else { 460 backendAddrs = append(backendAddrs, a) 461 } 462 } 463 if sd := grpclbstate.Get(ccs.ResolverState); sd != nil { 464 // Override any balancer addresses provided via 465 // ccs.ResolverState.Addresses. 466 remoteBalancerAddrs = sd.BalancerAddresses 467 } 468 469 if len(backendAddrs)+len(remoteBalancerAddrs) == 0 { 470 // There should be at least one address, either grpclb server or 471 // fallback. Empty address is not valid. 472 return balancer.ErrBadResolverState 473 } 474 475 if len(remoteBalancerAddrs) == 0 { 476 if lb.ccRemoteLB != nil { 477 lb.ccRemoteLB.close() 478 lb.ccRemoteLB = nil 479 } 480 } else if lb.ccRemoteLB == nil { 481 // First time receiving resolved addresses, create a cc to remote 482 // balancers. 483 lb.newRemoteBalancerCCWrapper() 484 // Start the fallback goroutine. 485 go lb.fallbackToBackendsAfter(lb.fallbackTimeout) 486 } 487 488 if lb.ccRemoteLB != nil { 489 // cc to remote balancers uses lb.manualResolver. Send the updated remote 490 // balancer addresses to it through manualResolver. 491 lb.manualResolver.UpdateState(resolver.State{Addresses: remoteBalancerAddrs}) 492 } 493 494 lb.mu.Lock() 495 lb.resolvedBackendAddrs = backendAddrs 496 if len(remoteBalancerAddrs) == 0 || lb.inFallback { 497 // If there's no remote balancer address in ClientConn update, grpclb 498 // enters fallback mode immediately. 499 // 500 // If a new update is received while grpclb is in fallback, update the 501 // list of backends being used to the new fallback backends. 502 lb.refreshSubConns(lb.resolvedBackendAddrs, true, lb.usePickFirst) 503 } 504 lb.mu.Unlock() 505 return nil 506 } 507 508 func (lb *lbBalancer) Close() { 509 select { 510 case <-lb.doneCh: 511 return 512 default: 513 } 514 close(lb.doneCh) 515 if lb.ccRemoteLB != nil { 516 lb.ccRemoteLB.close() 517 } 518 lb.cc.close() 519 } 520 521 func (lb *lbBalancer) ExitIdle() {}