gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/balancer/grpclb/grpclb_remote_balancer.go (about) 1 /* 2 * 3 * Copyright 2017 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package grpclb 20 21 import ( 22 "context" 23 "fmt" 24 "io" 25 "net" 26 "sync" 27 "time" 28 29 grpc "gitee.com/ks-custle/core-gm/grpc" 30 "gitee.com/ks-custle/core-gm/grpc/balancer" 31 lbpb "gitee.com/ks-custle/core-gm/grpc/balancer/grpclb/grpc_lb_v1" 32 "gitee.com/ks-custle/core-gm/grpc/connectivity" 33 "gitee.com/ks-custle/core-gm/grpc/internal/backoff" 34 "gitee.com/ks-custle/core-gm/grpc/internal/channelz" 35 imetadata "gitee.com/ks-custle/core-gm/grpc/internal/metadata" 36 "gitee.com/ks-custle/core-gm/grpc/keepalive" 37 "gitee.com/ks-custle/core-gm/grpc/metadata" 38 "gitee.com/ks-custle/core-gm/grpc/resolver" 39 "github.com/golang/protobuf/proto" 40 timestamppb "github.com/golang/protobuf/ptypes/timestamp" 41 "github.com/google/go-cmp/cmp" 42 ) 43 44 // processServerList updates balancer's internal state, create/remove SubConns 45 // and regenerates picker using the received serverList. 46 func (lb *lbBalancer) processServerList(l *lbpb.ServerList) { 47 if logger.V(2) { 48 logger.Infof("lbBalancer: processing server list: %+v", l) 49 } 50 lb.mu.Lock() 51 defer lb.mu.Unlock() 52 53 // Set serverListReceived to true so fallback will not take effect if it has 54 // not hit timeout. 55 lb.serverListReceived = true 56 57 // If the new server list == old server list, do nothing. 58 if cmp.Equal(lb.fullServerList, l.Servers, cmp.Comparer(proto.Equal)) { 59 if logger.V(2) { 60 logger.Infof("lbBalancer: new serverlist same as the previous one, ignoring") 61 } 62 return 63 } 64 lb.fullServerList = l.Servers 65 66 var backendAddrs []resolver.Address 67 for i, s := range l.Servers { 68 if s.Drop { 69 continue 70 } 71 72 md := metadata.Pairs(lbTokenKey, s.LoadBalanceToken) 73 ip := net.IP(s.IpAddress) 74 ipStr := ip.String() 75 if ip.To4() == nil { 76 // Add square brackets to ipv6 addresses, otherwise net.Dial() and 77 // net.SplitHostPort() will return too many colons error. 78 ipStr = fmt.Sprintf("[%s]", ipStr) 79 } 80 addr := imetadata.Set(resolver.Address{Addr: fmt.Sprintf("%s:%d", ipStr, s.Port)}, md) 81 if logger.V(2) { 82 logger.Infof("lbBalancer: server list entry[%d]: ipStr:|%s|, port:|%d|, load balancer token:|%v|", 83 i, ipStr, s.Port, s.LoadBalanceToken) 84 } 85 backendAddrs = append(backendAddrs, addr) 86 } 87 88 // Call refreshSubConns to create/remove SubConns. If we are in fallback, 89 // this is also exiting fallback. 90 lb.refreshSubConns(backendAddrs, false, lb.usePickFirst) 91 } 92 93 // refreshSubConns creates/removes SubConns with backendAddrs, and refreshes 94 // balancer state and picker. 95 // 96 // Caller must hold lb.mu. 97 func (lb *lbBalancer) refreshSubConns(backendAddrs []resolver.Address, fallback bool, pickFirst bool) { 98 opts := balancer.NewSubConnOptions{} 99 if !fallback { 100 opts.CredsBundle = lb.grpclbBackendCreds 101 } 102 103 lb.backendAddrs = backendAddrs 104 lb.backendAddrsWithoutMetadata = nil 105 106 fallbackModeChanged := lb.inFallback != fallback 107 lb.inFallback = fallback 108 if fallbackModeChanged && lb.inFallback { 109 // Clear previous received list when entering fallback, so if the server 110 // comes back and sends the same list again, the new addresses will be 111 // used. 112 lb.fullServerList = nil 113 } 114 115 balancingPolicyChanged := lb.usePickFirst != pickFirst 116 oldUsePickFirst := lb.usePickFirst 117 lb.usePickFirst = pickFirst 118 119 if fallbackModeChanged || balancingPolicyChanged { 120 // Remove all SubConns when switching balancing policy or switching 121 // fallback mode. 122 // 123 // For fallback mode switching with pickfirst, we want to recreate the 124 // SubConn because the creds could be different. 125 for a, sc := range lb.subConns { 126 if oldUsePickFirst { 127 // If old SubConn were created for pickfirst, bypass cache and 128 // remove directly. 129 lb.cc.cc.RemoveSubConn(sc) 130 } else { 131 lb.cc.RemoveSubConn(sc) 132 } 133 delete(lb.subConns, a) 134 } 135 } 136 137 if lb.usePickFirst { 138 var ( 139 scKey resolver.Address 140 sc balancer.SubConn 141 ) 142 for scKey, sc = range lb.subConns { 143 break 144 } 145 if sc != nil { 146 if len(backendAddrs) == 0 { 147 lb.cc.cc.RemoveSubConn(sc) 148 delete(lb.subConns, scKey) 149 return 150 } 151 lb.cc.cc.UpdateAddresses(sc, backendAddrs) 152 sc.Connect() 153 return 154 } 155 // This bypasses the cc wrapper with SubConn cache. 156 sc, err := lb.cc.cc.NewSubConn(backendAddrs, opts) 157 if err != nil { 158 logger.Warningf("grpclb: failed to create new SubConn: %v", err) 159 return 160 } 161 sc.Connect() 162 lb.subConns[backendAddrs[0]] = sc 163 lb.scStates[sc] = connectivity.Idle 164 return 165 } 166 167 // addrsSet is the set converted from backendAddrsWithoutMetadata, it's used to quick 168 // lookup for an address. 169 addrsSet := make(map[resolver.Address]struct{}) 170 // Create new SubConns. 171 for _, addr := range backendAddrs { 172 addrWithoutAttrs := addr 173 addrWithoutAttrs.Attributes = nil 174 addrsSet[addrWithoutAttrs] = struct{}{} 175 lb.backendAddrsWithoutMetadata = append(lb.backendAddrsWithoutMetadata, addrWithoutAttrs) 176 177 if _, ok := lb.subConns[addrWithoutAttrs]; !ok { 178 // Use addrWithMD to create the SubConn. 179 sc, err := lb.cc.NewSubConn([]resolver.Address{addr}, opts) 180 if err != nil { 181 logger.Warningf("grpclb: failed to create new SubConn: %v", err) 182 continue 183 } 184 lb.subConns[addrWithoutAttrs] = sc // Use the addr without MD as key for the map. 185 if _, ok := lb.scStates[sc]; !ok { 186 // Only set state of new sc to IDLE. The state could already be 187 // READY for cached SubConns. 188 lb.scStates[sc] = connectivity.Idle 189 } 190 sc.Connect() 191 } 192 } 193 194 for a, sc := range lb.subConns { 195 // a was removed by resolver. 196 if _, ok := addrsSet[a]; !ok { 197 lb.cc.RemoveSubConn(sc) 198 delete(lb.subConns, a) 199 // Keep the state of this sc in b.scStates until sc's state becomes Shutdown. 200 // The entry will be deleted in UpdateSubConnState. 201 } 202 } 203 204 // Regenerate and update picker after refreshing subconns because with 205 // cache, even if SubConn was newed/removed, there might be no state 206 // changes (the subconn will be kept in cache, not actually 207 // newed/removed). 208 lb.updateStateAndPicker(true, true) 209 } 210 211 type remoteBalancerCCWrapper struct { 212 cc *grpc.ClientConn 213 lb *lbBalancer 214 backoff backoff.Strategy 215 done chan struct{} 216 217 streamMu sync.Mutex 218 streamCancel func() 219 220 // waitgroup to wait for all goroutines to exit. 221 wg sync.WaitGroup 222 } 223 224 func (lb *lbBalancer) newRemoteBalancerCCWrapper() { 225 var dopts []grpc.DialOption 226 if creds := lb.opt.DialCreds; creds != nil { 227 dopts = append(dopts, grpc.WithTransportCredentials(creds)) 228 } else if bundle := lb.grpclbClientConnCreds; bundle != nil { 229 dopts = append(dopts, grpc.WithCredentialsBundle(bundle)) 230 } else { 231 dopts = append(dopts, grpc.WithInsecure()) 232 } 233 if lb.opt.Dialer != nil { 234 dopts = append(dopts, grpc.WithContextDialer(lb.opt.Dialer)) 235 } 236 if lb.opt.CustomUserAgent != "" { 237 dopts = append(dopts, grpc.WithUserAgent(lb.opt.CustomUserAgent)) 238 } 239 // Explicitly set pickfirst as the balancer. 240 dopts = append(dopts, grpc.WithDefaultServiceConfig(`{"loadBalancingPolicy":"pick_first"}`)) 241 dopts = append(dopts, grpc.WithResolvers(lb.manualResolver)) 242 if channelz.IsOn() { 243 dopts = append(dopts, grpc.WithChannelzParentID(lb.opt.ChannelzParentID)) 244 } 245 246 // Enable Keepalive for grpclb client. 247 dopts = append(dopts, grpc.WithKeepaliveParams(keepalive.ClientParameters{ 248 Time: 20 * time.Second, 249 Timeout: 10 * time.Second, 250 PermitWithoutStream: true, 251 })) 252 253 // The dial target is not important. 254 // 255 // The grpclb server addresses will set field ServerName, and creds will 256 // receive ServerName as authority. 257 cc, err := grpc.DialContext(context.Background(), lb.manualResolver.Scheme()+":///grpclb.subClientConn", dopts...) 258 if err != nil { 259 logger.Fatalf("failed to dial: %v", err) 260 } 261 ccw := &remoteBalancerCCWrapper{ 262 cc: cc, 263 lb: lb, 264 backoff: lb.backoff, 265 done: make(chan struct{}), 266 } 267 lb.ccRemoteLB = ccw 268 ccw.wg.Add(1) 269 go ccw.watchRemoteBalancer() 270 } 271 272 // close closed the ClientConn to remote balancer, and waits until all 273 // goroutines to finish. 274 func (ccw *remoteBalancerCCWrapper) close() { 275 close(ccw.done) 276 ccw.cc.Close() 277 ccw.wg.Wait() 278 } 279 280 func (ccw *remoteBalancerCCWrapper) readServerList(s *balanceLoadClientStream) error { 281 for { 282 reply, err := s.Recv() 283 if err != nil { 284 if err == io.EOF { 285 return errServerTerminatedConnection 286 } 287 return fmt.Errorf("grpclb: failed to recv server list: %v", err) 288 } 289 if serverList := reply.GetServerList(); serverList != nil { 290 ccw.lb.processServerList(serverList) 291 } 292 if reply.GetFallbackResponse() != nil { 293 // Eagerly enter fallback 294 ccw.lb.mu.Lock() 295 ccw.lb.refreshSubConns(ccw.lb.resolvedBackendAddrs, true, ccw.lb.usePickFirst) 296 ccw.lb.mu.Unlock() 297 } 298 } 299 } 300 301 func (ccw *remoteBalancerCCWrapper) sendLoadReport(s *balanceLoadClientStream, interval time.Duration) { 302 ticker := time.NewTicker(interval) 303 defer ticker.Stop() 304 lastZero := false 305 for { 306 select { 307 case <-ticker.C: 308 case <-s.Context().Done(): 309 return 310 } 311 stats := ccw.lb.clientStats.toClientStats() 312 zero := isZeroStats(stats) 313 if zero && lastZero { 314 // Quash redundant empty load reports. 315 continue 316 } 317 lastZero = zero 318 t := time.Now() 319 stats.Timestamp = ×tamppb.Timestamp{ 320 Seconds: t.Unix(), 321 Nanos: int32(t.Nanosecond()), 322 } 323 if err := s.Send(&lbpb.LoadBalanceRequest{ 324 LoadBalanceRequestType: &lbpb.LoadBalanceRequest_ClientStats{ 325 ClientStats: stats, 326 }, 327 }); err != nil { 328 return 329 } 330 } 331 } 332 333 func (ccw *remoteBalancerCCWrapper) callRemoteBalancer(ctx context.Context) (backoff bool, _ error) { 334 lbClient := &loadBalancerClient{cc: ccw.cc} 335 stream, err := lbClient.BalanceLoad(ctx, grpc.WaitForReady(true)) 336 if err != nil { 337 return true, fmt.Errorf("grpclb: failed to perform RPC to the remote balancer %v", err) 338 } 339 ccw.lb.mu.Lock() 340 ccw.lb.remoteBalancerConnected = true 341 ccw.lb.mu.Unlock() 342 343 // grpclb handshake on the stream. 344 initReq := &lbpb.LoadBalanceRequest{ 345 LoadBalanceRequestType: &lbpb.LoadBalanceRequest_InitialRequest{ 346 InitialRequest: &lbpb.InitialLoadBalanceRequest{ 347 Name: ccw.lb.target, 348 }, 349 }, 350 } 351 if err := stream.Send(initReq); err != nil { 352 return true, fmt.Errorf("grpclb: failed to send init request: %v", err) 353 } 354 reply, err := stream.Recv() 355 if err != nil { 356 return true, fmt.Errorf("grpclb: failed to recv init response: %v", err) 357 } 358 initResp := reply.GetInitialResponse() 359 if initResp == nil { 360 return true, fmt.Errorf("grpclb: reply from remote balancer did not include initial response") 361 } 362 363 ccw.wg.Add(1) 364 go func() { 365 defer ccw.wg.Done() 366 if d := convertDuration(initResp.ClientStatsReportInterval); d > 0 { 367 ccw.sendLoadReport(stream, d) 368 } 369 }() 370 // No backoff if init req/resp handshake was successful. 371 return false, ccw.readServerList(stream) 372 } 373 374 // cancelRemoteBalancerCall cancels the context used by the stream to the remote 375 // balancer. watchRemoteBalancer() takes care of restarting this call after the 376 // stream fails. 377 func (ccw *remoteBalancerCCWrapper) cancelRemoteBalancerCall() { 378 ccw.streamMu.Lock() 379 if ccw.streamCancel != nil { 380 ccw.streamCancel() 381 ccw.streamCancel = nil 382 } 383 ccw.streamMu.Unlock() 384 } 385 386 func (ccw *remoteBalancerCCWrapper) watchRemoteBalancer() { 387 defer func() { 388 ccw.wg.Done() 389 ccw.streamMu.Lock() 390 if ccw.streamCancel != nil { 391 // This is to make sure that we don't leak the context when we are 392 // directly returning from inside of the below `for` loop. 393 ccw.streamCancel() 394 ccw.streamCancel = nil 395 } 396 ccw.streamMu.Unlock() 397 }() 398 399 var retryCount int 400 var ctx context.Context 401 for { 402 ccw.streamMu.Lock() 403 if ccw.streamCancel != nil { 404 ccw.streamCancel() 405 ccw.streamCancel = nil 406 } 407 ctx, ccw.streamCancel = context.WithCancel(context.Background()) 408 ccw.streamMu.Unlock() 409 410 doBackoff, err := ccw.callRemoteBalancer(ctx) 411 select { 412 case <-ccw.done: 413 return 414 default: 415 if err != nil { 416 if err == errServerTerminatedConnection { 417 logger.Info(err) 418 } else { 419 logger.Warning(err) 420 } 421 } 422 } 423 // Trigger a re-resolve when the stream errors. 424 ccw.lb.cc.cc.ResolveNow(resolver.ResolveNowOptions{}) 425 426 ccw.lb.mu.Lock() 427 ccw.lb.remoteBalancerConnected = false 428 ccw.lb.fullServerList = nil 429 // Enter fallback when connection to remote balancer is lost, and the 430 // aggregated state is not Ready. 431 if !ccw.lb.inFallback && ccw.lb.state != connectivity.Ready { 432 // Entering fallback. 433 ccw.lb.refreshSubConns(ccw.lb.resolvedBackendAddrs, true, ccw.lb.usePickFirst) 434 } 435 ccw.lb.mu.Unlock() 436 437 if !doBackoff { 438 retryCount = 0 439 continue 440 } 441 442 timer := time.NewTimer(ccw.backoff.Backoff(retryCount)) // Copy backoff 443 select { 444 case <-timer.C: 445 case <-ccw.done: 446 timer.Stop() 447 return 448 } 449 retryCount++ 450 } 451 }