gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/balancer/grpclb/grpclb.go (about) 1 /* 2 * 3 * Copyright 2016 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // package grpclb defines a grpclb balancer. 20 // 21 // To install grpclb balancer, import this package as: 22 // 23 // import _ "gitee.com/ks-custle/core-gm/grpc/balancer/grpclb" 24 package grpclb 25 26 import ( 27 "context" 28 "errors" 29 "fmt" 30 "sync" 31 "time" 32 33 grpc "gitee.com/ks-custle/core-gm/grpc" 34 "gitee.com/ks-custle/core-gm/grpc/balancer" 35 grpclbstate "gitee.com/ks-custle/core-gm/grpc/balancer/grpclb/state" 36 "gitee.com/ks-custle/core-gm/grpc/connectivity" 37 "gitee.com/ks-custle/core-gm/grpc/credentials" 38 "gitee.com/ks-custle/core-gm/grpc/grpclog" 39 "gitee.com/ks-custle/core-gm/grpc/internal" 40 "gitee.com/ks-custle/core-gm/grpc/internal/backoff" 41 "gitee.com/ks-custle/core-gm/grpc/internal/resolver/dns" 42 "gitee.com/ks-custle/core-gm/grpc/resolver" 43 44 lbpb "gitee.com/ks-custle/core-gm/grpc/balancer/grpclb/grpc_lb_v1" 45 durationpb "github.com/golang/protobuf/ptypes/duration" 46 ) 47 48 const ( 49 lbTokenKey = "lb-token" 50 defaultFallbackTimeout = 10 * time.Second 51 grpclbName = "grpclb" 52 ) 53 54 var errServerTerminatedConnection = errors.New("grpclb: failed to recv server list: server terminated connection") 55 var logger = grpclog.Component("grpclb") 56 57 func convertDuration(d *durationpb.Duration) time.Duration { 58 if d == nil { 59 return 0 60 } 61 return time.Duration(d.Seconds)*time.Second + time.Duration(d.Nanos)*time.Nanosecond 62 } 63 64 // Client API for LoadBalancer service. 65 // Mostly copied from generated pb.go file. 66 // To avoid circular dependency. 67 type loadBalancerClient struct { 68 cc *grpc.ClientConn 69 } 70 71 func (c *loadBalancerClient) BalanceLoad(ctx context.Context, opts ...grpc.CallOption) (*balanceLoadClientStream, error) { 72 desc := &grpc.StreamDesc{ 73 StreamName: "BalanceLoad", 74 ServerStreams: true, 75 ClientStreams: true, 76 } 77 stream, err := c.cc.NewStream(ctx, desc, "/grpc.lb.v1.LoadBalancer/BalanceLoad", opts...) 78 if err != nil { 79 return nil, err 80 } 81 x := &balanceLoadClientStream{stream} 82 return x, nil 83 } 84 85 type balanceLoadClientStream struct { 86 grpc.ClientStream 87 } 88 89 func (x *balanceLoadClientStream) Send(m *lbpb.LoadBalanceRequest) error { 90 return x.ClientStream.SendMsg(m) 91 } 92 93 func (x *balanceLoadClientStream) Recv() (*lbpb.LoadBalanceResponse, error) { 94 m := new(lbpb.LoadBalanceResponse) 95 if err := x.ClientStream.RecvMsg(m); err != nil { 96 return nil, err 97 } 98 return m, nil 99 } 100 101 func init() { 102 balancer.Register(newLBBuilder()) 103 dns.EnableSRVLookups = true 104 } 105 106 // newLBBuilder creates a builder for grpclb. 107 func newLBBuilder() balancer.Builder { 108 return newLBBuilderWithFallbackTimeout(defaultFallbackTimeout) 109 } 110 111 // newLBBuilderWithFallbackTimeout creates a grpclb builder with the given 112 // fallbackTimeout. If no response is received from the remote balancer within 113 // fallbackTimeout, the backend addresses from the resolved address list will be 114 // used. 115 // 116 // Only call this function when a non-default fallback timeout is needed. 117 func newLBBuilderWithFallbackTimeout(fallbackTimeout time.Duration) balancer.Builder { 118 return &lbBuilder{ 119 fallbackTimeout: fallbackTimeout, 120 } 121 } 122 123 type lbBuilder struct { 124 fallbackTimeout time.Duration 125 } 126 127 func (b *lbBuilder) Name() string { 128 return grpclbName 129 } 130 131 func (b *lbBuilder) Build(cc balancer.ClientConn, opt balancer.BuildOptions) balancer.Balancer { 132 // This generates a manual resolver builder with a fixed scheme. This 133 // scheme will be used to dial to remote LB, so we can send filtered 134 // address updates to remote LB ClientConn using this manual resolver. 135 r := &lbManualResolver{scheme: "grpclb-internal", ccb: cc} 136 137 lb := &lbBalancer{ 138 cc: newLBCacheClientConn(cc), 139 // Endpoint is deprecated, use GetEndpoint() instead. 140 //dialTarget: opt.Target.Endpoint, 141 //target: opt.Target.Endpoint, 142 dialTarget: opt.Target.GetEndpoint(), 143 target: opt.Target.GetEndpoint(), 144 opt: opt, 145 fallbackTimeout: b.fallbackTimeout, 146 doneCh: make(chan struct{}), 147 148 manualResolver: r, 149 subConns: make(map[resolver.Address]balancer.SubConn), 150 scStates: make(map[balancer.SubConn]connectivity.State), 151 picker: &errPicker{err: balancer.ErrNoSubConnAvailable}, 152 clientStats: newRPCStats(), 153 backoff: backoff.DefaultExponential, // TODO: make backoff configurable. 154 } 155 156 var err error 157 if opt.CredsBundle != nil { 158 lb.grpclbClientConnCreds, err = opt.CredsBundle.NewWithMode(internal.CredsBundleModeBalancer) 159 if err != nil { 160 logger.Warningf("lbBalancer: client connection creds NewWithMode failed: %v", err) 161 } 162 lb.grpclbBackendCreds, err = opt.CredsBundle.NewWithMode(internal.CredsBundleModeBackendFromBalancer) 163 if err != nil { 164 logger.Warningf("lbBalancer: backend creds NewWithMode failed: %v", err) 165 } 166 } 167 168 return lb 169 } 170 171 type lbBalancer struct { 172 cc *lbCacheClientConn 173 dialTarget string // user's dial target 174 target string // same as dialTarget unless overridden in service config 175 opt balancer.BuildOptions 176 177 usePickFirst bool 178 179 // grpclbClientConnCreds is the creds bundle to be used to connect to grpclb 180 // servers. If it's nil, use the TransportCredentials from BuildOptions 181 // instead. 182 grpclbClientConnCreds credentials.Bundle 183 // grpclbBackendCreds is the creds bundle to be used for addresses that are 184 // returned by grpclb server. If it's nil, don't set anything when creating 185 // SubConns. 186 grpclbBackendCreds credentials.Bundle 187 188 fallbackTimeout time.Duration 189 doneCh chan struct{} 190 191 // manualResolver is used in the remote LB ClientConn inside grpclb. When 192 // resolved address updates are received by grpclb, filtered updates will be 193 // send to remote LB ClientConn through this resolver. 194 manualResolver *lbManualResolver 195 // The ClientConn to talk to the remote balancer. 196 ccRemoteLB *remoteBalancerCCWrapper 197 // backoff for calling remote balancer. 198 backoff backoff.Strategy 199 200 // Support client side load reporting. Each picker gets a reference to this, 201 // and will update its content. 202 clientStats *rpcStats 203 204 mu sync.Mutex // guards everything following. 205 // The full server list including drops, used to check if the newly received 206 // serverList contains anything new. Each generate picker will also have 207 // reference to this list to do the first layer pick. 208 fullServerList []*lbpb.Server 209 // Backend addresses. It's kept so the addresses are available when 210 // switching between round_robin and pickfirst. 211 backendAddrs []resolver.Address 212 // All backends addresses, with metadata set to nil. This list contains all 213 // backend addresses in the same order and with the same duplicates as in 214 // serverlist. When generating picker, a SubConn slice with the same order 215 // but with only READY SCs will be gerenated. 216 backendAddrsWithoutMetadata []resolver.Address 217 // Roundrobin functionalities. 218 state connectivity.State 219 subConns map[resolver.Address]balancer.SubConn // Used to new/remove SubConn. 220 scStates map[balancer.SubConn]connectivity.State // Used to filter READY SubConns. 221 picker balancer.Picker 222 // Support fallback to resolved backend addresses if there's no response 223 // from remote balancer within fallbackTimeout. 224 remoteBalancerConnected bool 225 serverListReceived bool 226 inFallback bool 227 // resolvedBackendAddrs is resolvedAddrs minus remote balancers. It's set 228 // when resolved address updates are received, and read in the goroutine 229 // handling fallback. 230 resolvedBackendAddrs []resolver.Address 231 connErr error // the last connection error 232 } 233 234 // regeneratePicker takes a snapshot of the balancer, and generates a picker from 235 // it. The picker 236 // - always returns ErrTransientFailure if the balancer is in TransientFailure, 237 // - does two layer roundrobin pick otherwise. 238 // 239 // Caller must hold lb.mu. 240 func (lb *lbBalancer) regeneratePicker(resetDrop bool) { 241 if lb.state == connectivity.TransientFailure { 242 lb.picker = &errPicker{err: fmt.Errorf("all SubConns are in TransientFailure, last connection error: %v", lb.connErr)} 243 return 244 } 245 246 if lb.state == connectivity.Connecting { 247 lb.picker = &errPicker{err: balancer.ErrNoSubConnAvailable} 248 return 249 } 250 251 var readySCs []balancer.SubConn 252 if lb.usePickFirst { 253 for _, sc := range lb.subConns { 254 readySCs = append(readySCs, sc) 255 break 256 } 257 } else { 258 for _, a := range lb.backendAddrsWithoutMetadata { 259 if sc, ok := lb.subConns[a]; ok { 260 if st, ok := lb.scStates[sc]; ok && st == connectivity.Ready { 261 readySCs = append(readySCs, sc) 262 } 263 } 264 } 265 } 266 267 if len(readySCs) <= 0 { 268 // If there's no ready SubConns, always re-pick. This is to avoid drops 269 // unless at least one SubConn is ready. Otherwise we may drop more 270 // often than want because of drops + re-picks(which become re-drops). 271 // 272 // This doesn't seem to be necessary after the connecting check above. 273 // Kept for safety. 274 lb.picker = &errPicker{err: balancer.ErrNoSubConnAvailable} 275 return 276 } 277 if lb.inFallback { 278 lb.picker = newRRPicker(readySCs) 279 return 280 } 281 if resetDrop { 282 lb.picker = newLBPicker(lb.fullServerList, readySCs, lb.clientStats) 283 return 284 } 285 prevLBPicker, ok := lb.picker.(*lbPicker) 286 if !ok { 287 lb.picker = newLBPicker(lb.fullServerList, readySCs, lb.clientStats) 288 return 289 } 290 prevLBPicker.updateReadySCs(readySCs) 291 } 292 293 // aggregateSubConnStats calculate the aggregated state of SubConns in 294 // lb.SubConns. These SubConns are subconns in use (when switching between 295 // fallback and grpclb). lb.scState contains states for all SubConns, including 296 // those in cache (SubConns are cached for 10 seconds after remove). 297 // 298 // The aggregated state is: 299 // - If at least one SubConn in Ready, the aggregated state is Ready; 300 // - Else if at least one SubConn in Connecting or IDLE, the aggregated state is Connecting; 301 // - It's OK to consider IDLE as Connecting. SubConns never stay in IDLE, 302 // they start to connect immediately. But there's a race between the overall 303 // state is reported, and when the new SubConn state arrives. And SubConns 304 // never go back to IDLE. 305 // - Else the aggregated state is TransientFailure. 306 func (lb *lbBalancer) aggregateSubConnStates() connectivity.State { 307 var numConnecting uint64 308 309 for _, sc := range lb.subConns { 310 if state, ok := lb.scStates[sc]; ok { 311 switch state { 312 case connectivity.Ready: 313 return connectivity.Ready 314 case connectivity.Connecting, connectivity.Idle: 315 numConnecting++ 316 } 317 } 318 } 319 if numConnecting > 0 { 320 return connectivity.Connecting 321 } 322 return connectivity.TransientFailure 323 } 324 325 func (lb *lbBalancer) UpdateSubConnState(sc balancer.SubConn, scs balancer.SubConnState) { 326 s := scs.ConnectivityState 327 if logger.V(2) { 328 logger.Infof("lbBalancer: handle SubConn state change: %p, %v", sc, s) 329 } 330 lb.mu.Lock() 331 defer lb.mu.Unlock() 332 333 oldS, ok := lb.scStates[sc] 334 if !ok { 335 if logger.V(2) { 336 logger.Infof("lbBalancer: got state changes for an unknown SubConn: %p, %v", sc, s) 337 } 338 return 339 } 340 lb.scStates[sc] = s 341 switch s { 342 case connectivity.Idle: 343 sc.Connect() 344 case connectivity.Shutdown: 345 // When an address was removed by resolver, b called RemoveSubConn but 346 // kept the sc's state in scStates. Remove state for this sc here. 347 delete(lb.scStates, sc) 348 case connectivity.TransientFailure: 349 lb.connErr = scs.ConnectionError 350 } 351 // Force regenerate picker if 352 // - this sc became ready from not-ready 353 // - this sc became not-ready from ready 354 lb.updateStateAndPicker((oldS == connectivity.Ready) != (s == connectivity.Ready), false) 355 356 // Enter fallback when the aggregated state is not Ready and the connection 357 // to remote balancer is lost. 358 if lb.state != connectivity.Ready { 359 if !lb.inFallback && !lb.remoteBalancerConnected { 360 // Enter fallback. 361 lb.refreshSubConns(lb.resolvedBackendAddrs, true, lb.usePickFirst) 362 } 363 } 364 } 365 366 // updateStateAndPicker re-calculate the aggregated state, and regenerate picker 367 // if overall state is changed. 368 // 369 // If forceRegeneratePicker is true, picker will be regenerated. 370 func (lb *lbBalancer) updateStateAndPicker(forceRegeneratePicker bool, resetDrop bool) { 371 oldAggrState := lb.state 372 lb.state = lb.aggregateSubConnStates() 373 // Regenerate picker when one of the following happens: 374 // - caller wants to regenerate 375 // - the aggregated state changed 376 if forceRegeneratePicker || (lb.state != oldAggrState) { 377 lb.regeneratePicker(resetDrop) 378 } 379 380 lb.cc.UpdateState(balancer.State{ConnectivityState: lb.state, Picker: lb.picker}) 381 } 382 383 // fallbackToBackendsAfter blocks for fallbackTimeout and falls back to use 384 // resolved backends (backends received from resolver, not from remote balancer) 385 // if no connection to remote balancers was successful. 386 func (lb *lbBalancer) fallbackToBackendsAfter(fallbackTimeout time.Duration) { 387 timer := time.NewTimer(fallbackTimeout) 388 defer timer.Stop() 389 select { 390 case <-timer.C: 391 case <-lb.doneCh: 392 return 393 } 394 lb.mu.Lock() 395 if lb.inFallback || lb.serverListReceived { 396 lb.mu.Unlock() 397 return 398 } 399 // Enter fallback. 400 lb.refreshSubConns(lb.resolvedBackendAddrs, true, lb.usePickFirst) 401 lb.mu.Unlock() 402 } 403 404 func (lb *lbBalancer) handleServiceConfig(gc *grpclbServiceConfig) { 405 lb.mu.Lock() 406 defer lb.mu.Unlock() 407 408 // grpclb uses the user's dial target to populate the `Name` field of the 409 // `InitialLoadBalanceRequest` message sent to the remote balancer. But when 410 // grpclb is used a child policy in the context of RLS, we want the `Name` 411 // field to be populated with the value received from the RLS server. To 412 // support this use case, an optional "target_name" field has been added to 413 // the grpclb LB policy's config. If specified, it overrides the name of 414 // the target to be sent to the remote balancer; if not, the target to be 415 // sent to the balancer will continue to be obtained from the target URI 416 // passed to the gRPC client channel. Whenever that target to be sent to the 417 // balancer is updated, we need to restart the stream to the balancer as 418 // this target is sent in the first message on the stream. 419 if gc != nil { 420 target := lb.dialTarget 421 if gc.TargetName != "" { 422 target = gc.TargetName 423 } 424 if target != lb.target { 425 lb.target = target 426 if lb.ccRemoteLB != nil { 427 lb.ccRemoteLB.cancelRemoteBalancerCall() 428 } 429 } 430 } 431 432 newUsePickFirst := childIsPickFirst(gc) 433 if lb.usePickFirst == newUsePickFirst { 434 return 435 } 436 if logger.V(2) { 437 logger.Infof("lbBalancer: switching mode, new usePickFirst: %+v", newUsePickFirst) 438 } 439 lb.refreshSubConns(lb.backendAddrs, lb.inFallback, newUsePickFirst) 440 } 441 442 func (lb *lbBalancer) ResolverError(error) { 443 // Ignore resolver errors. GRPCLB is not selected unless the resolver 444 // works at least once. 445 } 446 447 func (lb *lbBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error { 448 if logger.V(2) { 449 logger.Infof("lbBalancer: UpdateClientConnState: %+v", ccs) 450 } 451 gc, _ := ccs.BalancerConfig.(*grpclbServiceConfig) 452 lb.handleServiceConfig(gc) 453 454 addrs := ccs.ResolverState.Addresses 455 456 var remoteBalancerAddrs, backendAddrs []resolver.Address 457 for _, a := range addrs { 458 if a.Type == resolver.GRPCLB { 459 a.Type = resolver.Backend 460 remoteBalancerAddrs = append(remoteBalancerAddrs, a) 461 } else { 462 backendAddrs = append(backendAddrs, a) 463 } 464 } 465 if sd := grpclbstate.Get(ccs.ResolverState); sd != nil { 466 // Override any balancer addresses provided via 467 // ccs.ResolverState.Addresses. 468 remoteBalancerAddrs = sd.BalancerAddresses 469 } 470 471 if len(backendAddrs)+len(remoteBalancerAddrs) == 0 { 472 // There should be at least one address, either grpclb server or 473 // fallback. Empty address is not valid. 474 return balancer.ErrBadResolverState 475 } 476 477 if len(remoteBalancerAddrs) == 0 { 478 if lb.ccRemoteLB != nil { 479 lb.ccRemoteLB.close() 480 lb.ccRemoteLB = nil 481 } 482 } else if lb.ccRemoteLB == nil { 483 // First time receiving resolved addresses, create a cc to remote 484 // balancers. 485 lb.newRemoteBalancerCCWrapper() 486 // Start the fallback goroutine. 487 go lb.fallbackToBackendsAfter(lb.fallbackTimeout) 488 } 489 490 if lb.ccRemoteLB != nil { 491 // cc to remote balancers uses lb.manualResolver. Send the updated remote 492 // balancer addresses to it through manualResolver. 493 lb.manualResolver.UpdateState(resolver.State{Addresses: remoteBalancerAddrs}) 494 } 495 496 lb.mu.Lock() 497 lb.resolvedBackendAddrs = backendAddrs 498 if len(remoteBalancerAddrs) == 0 || lb.inFallback { 499 // If there's no remote balancer address in ClientConn update, grpclb 500 // enters fallback mode immediately. 501 // 502 // If a new update is received while grpclb is in fallback, update the 503 // list of backends being used to the new fallback backends. 504 lb.refreshSubConns(lb.resolvedBackendAddrs, true, lb.usePickFirst) 505 } 506 lb.mu.Unlock() 507 return nil 508 } 509 510 func (lb *lbBalancer) Close() { 511 select { 512 case <-lb.doneCh: 513 return 514 default: 515 } 516 close(lb.doneCh) 517 if lb.ccRemoteLB != nil { 518 lb.ccRemoteLB.close() 519 } 520 lb.cc.close() 521 } 522 523 func (lb *lbBalancer) ExitIdle() {}